Memory.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6 // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
7 // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
8 // Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 
15 /*****************************************************************************
16 *** Platform checks for aligned malloc functions ***
17 *****************************************************************************/
18 
19 #ifndef EIGEN_MEMORY_H
20 #define EIGEN_MEMORY_H
21 
22 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
23 // http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
24 // This is true at least since glibc 2.8.
25 // This leaves the question how to detect 64-bit. According to this document,
26 // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
27 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
28 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
29 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
30  && defined(__LP64__)
31  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
32 #else
33  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
34 #endif
35 
36 // FreeBSD 6 seems to have 16-byte aligned malloc
37 // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
38 // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
39 // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
40 #if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
41  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
42 #else
43  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
44 #endif
45 
46 #if defined(__APPLE__) \
47  || defined(_WIN64) \
48  || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
49  || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
50  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
51 #else
52  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
53 #endif
54 
55 // See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
56 // It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
57 // Currently, let's include it only on unix systems:
58 #if defined(__unix__) || defined(__unix)
59  #include <unistd.h>
60  #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
61  #define EIGEN_HAS_POSIX_MEMALIGN 1
62  #endif
63 #endif
64 
65 #ifndef EIGEN_HAS_POSIX_MEMALIGN
66  #define EIGEN_HAS_POSIX_MEMALIGN 0
67 #endif
68 
69 #ifdef EIGEN_VECTORIZE_SSE
70  #define EIGEN_HAS_MM_MALLOC 1
71 #else
72  #define EIGEN_HAS_MM_MALLOC 0
73 #endif
74 
75 namespace Eigen {
76 
77 namespace internal {
78 
79 inline void throw_std_bad_alloc()
80 {
81  #ifdef EIGEN_EXCEPTIONS
82  throw std::bad_alloc();
83  #else
84  std::size_t huge = -1;
85  new int[huge];
86  #endif
87 }
88 
89 /*****************************************************************************
90 *** Implementation of handmade aligned functions ***
91 *****************************************************************************/
92 
93 /* ----- Hand made implementations of aligned malloc/free and realloc ----- */
94 
98 inline void* handmade_aligned_malloc(std::size_t size)
99 {
100  void *original = std::malloc(size+16);
101  if (original == 0) return 0;
102  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
103  *(reinterpret_cast<void**>(aligned) - 1) = original;
104  return aligned;
105 }
106 
108 inline void handmade_aligned_free(void *ptr)
109 {
110  if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
111 }
112 
118 inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
119 {
120  if (ptr == 0) return handmade_aligned_malloc(size);
121  void *original = *(reinterpret_cast<void**>(ptr) - 1);
122  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
123  original = std::realloc(original,size+16);
124  if (original == 0) return 0;
125  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
126  void *previous_aligned = static_cast<char *>(original)+previous_offset;
127  if(aligned!=previous_aligned)
128  std::memmove(aligned, previous_aligned, size);
129 
130  *(reinterpret_cast<void**>(aligned) - 1) = original;
131  return aligned;
132 }
133 
134 /*****************************************************************************
135 *** Implementation of generic aligned realloc (when no realloc can be used)***
136 *****************************************************************************/
137 
138 void* aligned_malloc(std::size_t size);
139 void aligned_free(void *ptr);
140 
146 inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
147 {
148  if (ptr==0)
149  return aligned_malloc(size);
150 
151  if (size==0)
152  {
153  aligned_free(ptr);
154  return 0;
155  }
156 
157  void* newptr = aligned_malloc(size);
158  if (newptr == 0)
159  {
160  #ifdef EIGEN_HAS_ERRNO
161  errno = ENOMEM; // according to the standard
162  #endif
163  return 0;
164  }
165 
166  if (ptr != 0)
167  {
168  std::memcpy(newptr, ptr, (std::min)(size,old_size));
169  aligned_free(ptr);
170  }
171 
172  return newptr;
173 }
174 
175 /*****************************************************************************
176 *** Implementation of portable aligned versions of malloc/free/realloc ***
177 *****************************************************************************/
178 
179 #ifdef EIGEN_NO_MALLOC
180 inline void check_that_malloc_is_allowed()
181 {
182  eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
183 }
184 #elif defined EIGEN_RUNTIME_NO_MALLOC
185 inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
186 {
187  static bool value = true;
188  if (update == 1)
189  value = new_value;
190  return value;
191 }
192 inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
193 inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
194 inline void check_that_malloc_is_allowed()
195 {
196  eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
197 }
198 #else
199 inline void check_that_malloc_is_allowed()
200 {}
201 #endif
202 
206 inline void* aligned_malloc(size_t size)
207 {
208  check_that_malloc_is_allowed();
209 
210  void *result;
211  #if !EIGEN_ALIGN
212  result = std::malloc(size);
213  #elif EIGEN_MALLOC_ALREADY_ALIGNED
214  result = std::malloc(size);
215  #elif EIGEN_HAS_POSIX_MEMALIGN
216  if(posix_memalign(&result, 16, size)) result = 0;
217  #elif EIGEN_HAS_MM_MALLOC
218  result = _mm_malloc(size, 16);
219  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
220  result = _aligned_malloc(size, 16);
221  #else
222  result = handmade_aligned_malloc(size);
223  #endif
224 
225  if(!result && size)
226  throw_std_bad_alloc();
227 
228  return result;
229 }
230 
232 inline void aligned_free(void *ptr)
233 {
234  #if !EIGEN_ALIGN
235  std::free(ptr);
236  #elif EIGEN_MALLOC_ALREADY_ALIGNED
237  std::free(ptr);
238  #elif EIGEN_HAS_POSIX_MEMALIGN
239  std::free(ptr);
240  #elif EIGEN_HAS_MM_MALLOC
241  _mm_free(ptr);
242  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
243  _aligned_free(ptr);
244  #else
245  handmade_aligned_free(ptr);
246  #endif
247 }
248 
254 inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
255 {
256  EIGEN_UNUSED_VARIABLE(old_size);
257 
258  void *result;
259 #if !EIGEN_ALIGN
260  result = std::realloc(ptr,new_size);
261 #elif EIGEN_MALLOC_ALREADY_ALIGNED
262  result = std::realloc(ptr,new_size);
263 #elif EIGEN_HAS_POSIX_MEMALIGN
264  result = generic_aligned_realloc(ptr,new_size,old_size);
265 #elif EIGEN_HAS_MM_MALLOC
266  // The defined(_mm_free) is just here to verify that this MSVC version
267  // implements _mm_malloc/_mm_free based on the corresponding _aligned_
268  // functions. This may not always be the case and we just try to be safe.
269  #if defined(_MSC_VER) && defined(_mm_free)
270  result = _aligned_realloc(ptr,new_size,16);
271  #else
272  result = generic_aligned_realloc(ptr,new_size,old_size);
273  #endif
274 #elif defined(_MSC_VER)
275  result = _aligned_realloc(ptr,new_size,16);
276 #else
277  result = handmade_aligned_realloc(ptr,new_size,old_size);
278 #endif
279 
280  if (!result && new_size)
281  throw_std_bad_alloc();
282 
283  return result;
284 }
285 
286 /*****************************************************************************
287 *** Implementation of conditionally aligned functions ***
288 *****************************************************************************/
289 
293 template<bool Align> inline void* conditional_aligned_malloc(size_t size)
294 {
295  return aligned_malloc(size);
296 }
297 
298 template<> inline void* conditional_aligned_malloc<false>(size_t size)
299 {
300  check_that_malloc_is_allowed();
301 
302  void *result = std::malloc(size);
303  if(!result && size)
304  throw_std_bad_alloc();
305  return result;
306 }
307 
309 template<bool Align> inline void conditional_aligned_free(void *ptr)
310 {
311  aligned_free(ptr);
312 }
313 
314 template<> inline void conditional_aligned_free<false>(void *ptr)
315 {
316  std::free(ptr);
317 }
318 
319 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
320 {
321  return aligned_realloc(ptr, new_size, old_size);
322 }
323 
324 template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
325 {
326  return std::realloc(ptr, new_size);
327 }
328 
329 /*****************************************************************************
330 *** Construction/destruction of array elements ***
331 *****************************************************************************/
332 
336 template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size)
337 {
338  for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
339  return ptr;
340 }
341 
345 template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
346 {
347  // always destruct an array starting from the end.
348  if(ptr)
349  while(size) ptr[--size].~T();
350 }
351 
352 /*****************************************************************************
353 *** Implementation of aligned new/delete-like functions ***
354 *****************************************************************************/
355 
356 template<typename T>
357 EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
358 {
359  if(size > size_t(-1) / sizeof(T))
360  throw_std_bad_alloc();
361 }
362 
367 template<typename T> inline T* aligned_new(size_t size)
368 {
369  check_size_for_overflow<T>(size);
370  T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
371  return construct_elements_of_array(result, size);
372 }
373 
374 template<typename T, bool Align> inline T* conditional_aligned_new(size_t size)
375 {
376  check_size_for_overflow<T>(size);
377  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
378  return construct_elements_of_array(result, size);
379 }
380 
384 template<typename T> inline void aligned_delete(T *ptr, size_t size)
385 {
386  destruct_elements_of_array<T>(ptr, size);
387  aligned_free(ptr);
388 }
389 
393 template<typename T, bool Align> inline void conditional_aligned_delete(T *ptr, size_t size)
394 {
395  destruct_elements_of_array<T>(ptr, size);
396  conditional_aligned_free<Align>(ptr);
397 }
398 
399 template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
400 {
401  check_size_for_overflow<T>(new_size);
402  check_size_for_overflow<T>(old_size);
403  if(new_size < old_size)
404  destruct_elements_of_array(pts+new_size, old_size-new_size);
405  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
406  if(new_size > old_size)
407  construct_elements_of_array(result+old_size, new_size-old_size);
408  return result;
409 }
410 
411 
412 template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size)
413 {
414  check_size_for_overflow<T>(size);
415  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
416  if(NumTraits<T>::RequireInitialization)
417  construct_elements_of_array(result, size);
418  return result;
419 }
420 
421 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
422 {
423  check_size_for_overflow<T>(new_size);
424  check_size_for_overflow<T>(old_size);
425  if(NumTraits<T>::RequireInitialization && (new_size < old_size))
426  destruct_elements_of_array(pts+new_size, old_size-new_size);
427  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
428  if(NumTraits<T>::RequireInitialization && (new_size > old_size))
429  construct_elements_of_array(result+old_size, new_size-old_size);
430  return result;
431 }
432 
433 template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *ptr, size_t size)
434 {
435  if(NumTraits<T>::RequireInitialization)
436  destruct_elements_of_array<T>(ptr, size);
437  conditional_aligned_free<Align>(ptr);
438 }
439 
440 /****************************************************************************/
441 
458 template<typename Scalar, typename Index>
459 static inline Index first_aligned(const Scalar* array, Index size)
460 {
461  // typedef typename packet_traits<Scalar>::type Packet;
462  enum { PacketSize = packet_traits<Scalar>::size,
463  PacketAlignedMask = PacketSize-1
464  };
465 
466  if(PacketSize==1)
467  {
468  // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
469  // of the array have the same alignment.
470  return 0;
471  }
472  else if(size_t(array) & (sizeof(Scalar)-1))
473  {
474  // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
475  // Consequently, no element of the array is well aligned.
476  return size;
477  }
478  else
479  {
480  return std::min<Index>( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
481  & PacketAlignedMask, size);
482  }
483 }
484 
485 
486 // std::copy is much slower than memcpy, so let's introduce a smart_copy which
487 // use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
488 template<typename T, bool UseMemcpy> struct smart_copy_helper;
489 
490 template<typename T> void smart_copy(const T* start, const T* end, T* target)
491 {
492  smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
493 }
494 
495 template<typename T> struct smart_copy_helper<T,true> {
496  static inline void run(const T* start, const T* end, T* target)
497  { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
498 };
499 
500 template<typename T> struct smart_copy_helper<T,false> {
501  static inline void run(const T* start, const T* end, T* target)
502  { std::copy(start, end, target); }
503 };
504 
505 
506 /*****************************************************************************
507 *** Implementation of runtime stack allocation (falling back to malloc) ***
508 *****************************************************************************/
509 
510 // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
511 // to the appropriate stack allocation function
512 #ifndef EIGEN_ALLOCA
513  #if (defined __linux__)
514  #define EIGEN_ALLOCA alloca
515  #elif defined(_MSC_VER)
516  #define EIGEN_ALLOCA _alloca
517  #endif
518 #endif
519 
520 // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
521 // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
522 template<typename T> class aligned_stack_memory_handler
523 {
524  public:
525  /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
526  * Note that \a ptr can be 0 regardless of the other parameters.
527  * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
528  * In this case, the buffer elements will also be destructed when this handler will be destructed.
529  * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
530  **/
531  aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
532  : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
533  {
534  if(NumTraits<T>::RequireInitialization && m_ptr)
535  Eigen::internal::construct_elements_of_array(m_ptr, size);
536  }
537  ~aligned_stack_memory_handler()
538  {
539  if(NumTraits<T>::RequireInitialization && m_ptr)
540  Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
541  if(m_deallocate)
542  Eigen::internal::aligned_free(m_ptr);
543  }
544  protected:
545  T* m_ptr;
546  size_t m_size;
547  bool m_deallocate;
548 };
549 
550 } // end namespace internal
551 
567 #ifdef EIGEN_ALLOCA
568 
569  #ifdef __arm__
570  #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
571  #else
572  #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
573  #endif
574 
575  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
576  Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
577  TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
578  : reinterpret_cast<TYPE*>( \
579  (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
580  : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
581  Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
582 
583 #else
584 
585  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
586  Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
587  TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
588  Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
589 
590 #endif
591 
592 
593 /*****************************************************************************
594 *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
595 *****************************************************************************/
596 
597 #if EIGEN_ALIGN
598  #ifdef EIGEN_EXCEPTIONS
599  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
600  void* operator new(size_t size, const std::nothrow_t&) throw() { \
601  try { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
602  catch (...) { return 0; } \
603  return 0; \
604  }
605  #else
606  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
607  void* operator new(size_t size, const std::nothrow_t&) throw() { \
608  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
609  }
610  #endif
611 
612  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
613  void *operator new(size_t size) { \
614  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
615  } \
616  void *operator new[](size_t size) { \
617  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
618  } \
619  void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
620  void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
621  /* in-place new and delete. since (at least afaik) there is no actual */ \
622  /* memory allocated we can safely let the default implementation handle */ \
623  /* this particular case. */ \
624  static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
625  void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
626  /* nothrow-new (returns zero instead of std::bad_alloc) */ \
627  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
628  void operator delete(void *ptr, const std::nothrow_t&) throw() { \
629  Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
630  } \
631  typedef void eigen_aligned_operator_new_marker_type;
632 #else
633  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
634 #endif
635 
636 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
637 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
638  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0)))
639 
640 /****************************************************************************/
641 
658 template<class T>
660 {
661 public:
662  typedef size_t size_type;
663  typedef std::ptrdiff_t difference_type;
664  typedef T* pointer;
665  typedef const T* const_pointer;
666  typedef T& reference;
667  typedef const T& const_reference;
668  typedef T value_type;
669 
670  template<class U>
671  struct rebind
672  {
673  typedef aligned_allocator<U> other;
674  };
675 
676  pointer address( reference value ) const
677  {
678  return &value;
679  }
680 
681  const_pointer address( const_reference value ) const
682  {
683  return &value;
684  }
685 
687  {
688  }
689 
691  {
692  }
693 
694  template<class U>
696  {
697  }
698 
700  {
701  }
702 
703  size_type max_size() const
704  {
705  return (std::numeric_limits<size_type>::max)();
706  }
707 
708  pointer allocate( size_type num, const void* hint = 0 )
709  {
710  EIGEN_UNUSED_VARIABLE(hint);
711  internal::check_size_for_overflow<T>(num);
712  return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
713  }
714 
715  void construct( pointer p, const T& value )
716  {
717  ::new( p ) T( value );
718  }
719 
720  // Support for c++11
721 #if (__cplusplus >= 201103L)
722  template<typename... Args>
723  void construct(pointer p, Args&&... args)
724  {
725  ::new(p) T(std::forward<Args>(args)...);
726  }
727 #endif
728 
729  void destroy( pointer p )
730  {
731  p->~T();
732  }
733 
734  void deallocate( pointer p, size_type /*num*/ )
735  {
736  internal::aligned_free( p );
737  }
738 
739  bool operator!=(const aligned_allocator<T>& ) const
740  { return false; }
741 
742  bool operator==(const aligned_allocator<T>& ) const
743  { return true; }
744 };
745 
746 //---------- Cache sizes ----------
747 
748 #if !defined(EIGEN_NO_CPUID)
749 # if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
750 # if defined(__PIC__) && defined(__i386__)
751  // Case for x86 with PIC
752 # define EIGEN_CPUID(abcd,func,id) \
753  __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
754 # else
755  // Case for x86_64 or x86 w/o PIC
756 # define EIGEN_CPUID(abcd,func,id) \
757  __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
758 # endif
759 # elif defined(_MSC_VER)
760 # if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
761 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
762 # endif
763 # endif
764 #endif
765 
766 namespace internal {
767 
768 #ifdef EIGEN_CPUID
769 
770 inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
771 {
772  return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
773 }
774 
775 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
776 {
777  int abcd[4];
778  l1 = l2 = l3 = 0;
779  int cache_id = 0;
780  int cache_type = 0;
781  do {
782  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
783  EIGEN_CPUID(abcd,0x4,cache_id);
784  cache_type = (abcd[0] & 0x0F) >> 0;
785  if(cache_type==1||cache_type==3) // data or unified cache
786  {
787  int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
788  int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
789  int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
790  int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
791  int sets = (abcd[2]); // C[31:0]
792 
793  int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
794 
795  switch(cache_level)
796  {
797  case 1: l1 = cache_size; break;
798  case 2: l2 = cache_size; break;
799  case 3: l3 = cache_size; break;
800  default: break;
801  }
802  }
803  cache_id++;
804  } while(cache_type>0 && cache_id<16);
805 }
806 
807 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
808 {
809  int abcd[4];
810  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
811  l1 = l2 = l3 = 0;
812  EIGEN_CPUID(abcd,0x00000002,0);
813  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
814  bool check_for_p2_core2 = false;
815  for(int i=0; i<14; ++i)
816  {
817  switch(bytes[i])
818  {
819  case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
820  case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
821  case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
822  case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
823  case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
824  case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
825  case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
826  case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
827  case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
828  case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
829  case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
830  case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
831  case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
832  case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
833  case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
834  case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
835  case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
836  case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
837  case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
838  case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
839  case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
840  case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
841  case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
842  case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
843  case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
844  case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
845  case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
846  case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
847  case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
848  case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
849  case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
850  case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
851  case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
852  case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
853  case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
854  case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
855  case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
856  case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
857  case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
858  case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
859  case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
860  case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
861  case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
862  case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
863  case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
864  case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
865  case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
866  case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
867  case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
868  case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
869  case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
870  case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
871  case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
872  case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
873  case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
874  case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
875  case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
876 
877  default: break;
878  }
879  }
880  if(check_for_p2_core2 && l2 == l3)
881  l3 = 0;
882  l1 *= 1024;
883  l2 *= 1024;
884  l3 *= 1024;
885 }
886 
887 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
888 {
889  if(max_std_funcs>=4)
890  queryCacheSizes_intel_direct(l1,l2,l3);
891  else
892  queryCacheSizes_intel_codes(l1,l2,l3);
893 }
894 
895 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
896 {
897  int abcd[4];
898  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
899  EIGEN_CPUID(abcd,0x80000005,0);
900  l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
901  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
902  EIGEN_CPUID(abcd,0x80000006,0);
903  l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
904  l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
905 }
906 #endif
907 
910 inline void queryCacheSizes(int& l1, int& l2, int& l3)
911 {
912  #ifdef EIGEN_CPUID
913  int abcd[4];
914 
915  // identify the CPU vendor
916  EIGEN_CPUID(abcd,0x0,0);
917  int max_std_funcs = abcd[1];
918  if(cpuid_is_vendor(abcd,"GenuineIntel"))
919  queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
920  else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
921  queryCacheSizes_amd(l1,l2,l3);
922  else
923  // by default let's use Intel's API
924  queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
925 
926  // here is the list of other vendors:
927 // ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
928 // ||cpuid_is_vendor(abcd,"CyrixInstead")
929 // ||cpuid_is_vendor(abcd,"CentaurHauls")
930 // ||cpuid_is_vendor(abcd,"GenuineTMx86")
931 // ||cpuid_is_vendor(abcd,"TransmetaCPU")
932 // ||cpuid_is_vendor(abcd,"RiseRiseRise")
933 // ||cpuid_is_vendor(abcd,"Geode by NSC")
934 // ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
935 // ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
936 // ||cpuid_is_vendor(abcd,"NexGenDriven")
937  #else
938  l1 = l2 = l3 = -1;
939  #endif
940 }
941 
944 inline int queryL1CacheSize()
945 {
946  int l1(-1), l2, l3;
947  queryCacheSizes(l1,l2,l3);
948  return l1;
949 }
950 
953 inline int queryTopLevelCacheSize()
954 {
955  int l1, l2(-1), l3(-1);
956  queryCacheSizes(l1,l2,l3);
957  return (std::max)(l2,l3);
958 }
959 
960 } // end namespace internal
961 
962 } // end namespace Eigen
963 
964 #endif // EIGEN_MEMORY_H