22 #ifndef KALDI_CUDAMATRIX_CU_ALLOCATOR_H_    23 #define KALDI_CUDAMATRIX_CU_ALLOCATOR_H_    26 #include <cublas_v2.h>    28 #include <cuda_runtime_api.h>    68       cache_memory(true), memory_proportion(0.5), num_subregions(20) { }
    71     po->
Register(
"cuda-cache-memory", &cache_memory, 
"True if you want "    72                  "to use the caching allocator.  Set this to false only if you "    73                  "want to use cuda-memcheck or cuda-gdb; it will be slower.");
    74     po->
Register(
"cuda-memory-proportion", &memory_proportion,
    75                  "Proportion of the GPU device memory that the allocator "    76                  "should allocate at the start");
    81     KALDI_ASSERT(memory_proportion >= 0.05 && memory_proportion < 0.99);
   153 class CuMemoryAllocator {
   157   void* Malloc(
size_t size);
   160   void* MallocPitch(
size_t row_bytes, 
size_t num_rows, 
size_t *pitch);
   163   void Free(
void *ptr);
   166   inline void* MallocLocking(
size_t size) {
   167     std::unique_lock<std::mutex> lock(mutex_);
   171   inline void* MallocPitchLocking(
size_t row_bytes, 
size_t num_rows, 
size_t *pitch) {
   172     std::unique_lock<std::mutex> lock(mutex_);
   173     return MallocPitch(row_bytes, num_rows, pitch);
   176   void FreeLocking(
void *ptr) {
   177     std::unique_lock<std::mutex> lock(mutex_);
   181   void PrintMemoryUsage() 
const;
   184   size_t GetAllocatedMemory() { 
return allocated_memory_; }
   187   size_t GetMaxAllocatedMemory() { 
return max_allocated_memory_; }
   196   ~CuMemoryAllocator();
   205     SubRegion *subregion;  
   215     std::thread::id thread_id;  
   238   struct MemoryRegion {
   241     SubRegion *subregion_begin;  
   243     MemoryBlock *block_begin;  
   260     size_t memory_region;  
   263     size_t subregion_index;  
   270     std::set<std::pair<size_t, MemoryBlock*> > free_blocks;
   279   inline void* MallocInternal(
size_t size);
   283   inline void* MallocFromSubregion(SubRegion *subregion, 
size_t size);
   294   inline MemoryBlock *SplitBlock(MemoryBlock *block, 
size_t size);
   298   void RemoveFromFreeBlocks(MemoryBlock *block);
   302   void AddToFreeBlocks(MemoryBlock *block);
   308   void AllocateNewRegion(
size_t size);
   313   void SortSubregions();
   319   std::vector<MemoryRegion> memory_regions_;
   321   std::vector<SubRegion*> subregions_;
   328   std::vector<size_t> largest_free_block_;
   331   size_t synchronize_gpu_t_;     
   333   size_t num_synchronizations_;  
   334   double tot_time_taken_;  
   335   double malloc_time_taken_;  
   339   std::unordered_map<void*, MemoryBlock*> allocated_block_map_;
   347   size_t max_allocated_memory_;
   348   size_t allocated_memory_;
   357 std::string GetFreeGpuMemory(int64* free, int64* total);
   359 extern CuMemoryAllocator g_cuda_allocator;
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
CuAllocatorOptions g_allocator_options
 
void Register(OptionsItf *po)
 
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
 
BaseFloat memory_proportion
 
#define KALDI_ASSERT(cond)
 
void RegisterCuAllocatorOptions(OptionsItf *po)