44 #include "ngram_model_internal.h" 45 #include "lm_trie_quant.h" 47 #define FLOAT_INF (0x7f800000) 55 bins_t tables[NGRAM_MAX_ORDER - 1][2];
66 bins_create(
bins_t * bins, uint8 bits,
float *begin)
69 bins->end = bins->begin + (1ULL << bits);
73 lower_bound(
float *first,
const float *last,
float val)
95 bins_encode(
bins_t * bins,
float value)
97 float *above = lower_bound(bins->begin, bins->end, value);
98 if (above == bins->begin)
100 if (above == bins->end)
101 return bins->end - bins->begin - 1;
102 return above - bins->begin - (value - *(above - 1) < *above - value);
106 bins_decode(
bins_t * bins,
size_t off)
108 return bins->begin[off];
112 quant_size(
int order)
116 size_t longest_table = (1
U << prob_bits) *
sizeof(
float);
117 size_t middle_table = (1
U << bo_bits) *
sizeof(
float) + longest_table;
119 return (order - 2) * middle_table + longest_table;
123 lm_trie_quant_create(
int order)
129 quant->mem_size = quant_size(order);
131 (uint8 *)
ckd_calloc(quant->mem_size,
sizeof(*quant->mem));
133 quant->prob_bits = 16;
135 quant->prob_mask = (1
U << quant->prob_bits) - 1;
136 quant->bo_mask = (1
U << quant->bo_bits) - 1;
138 start = (
float *) (quant->mem);
139 for (i = 0; i < order - 2; i++) {
140 bins_create(&quant->tables[i][0], quant->prob_bits, start);
141 start += (1ULL << quant->prob_bits);
142 bins_create(&quant->tables[i][1], quant->bo_bits, start);
143 start += (1ULL << quant->bo_bits);
145 bins_create(&quant->tables[order - 2][0], quant->prob_bits, start);
146 quant->longest = &quant->tables[order - 2][0];
152 lm_trie_quant_read_bin(FILE * fp,
int order)
157 fread(&dummy,
sizeof(dummy), 1, fp);
158 quant = lm_trie_quant_create(order);
159 fread(quant->mem,
sizeof(*quant->mem), quant->mem_size, fp);
169 fwrite(&dummy,
sizeof(dummy), 1, fp);
170 fwrite(quant->mem,
sizeof(*quant->mem), quant->mem_size, fp);
194 weights_comparator(
const void *a,
const void *b)
196 return (
int) (*(
float *) a - *(
float *) b);
200 make_bins(
float *values, uint32 values_num,
float *centers, uint32 bins)
202 float *finish, *start;
205 qsort(values, values_num,
sizeof(*values), &weights_comparator);
207 for (i = 0; i < bins; i++, centers++, start = finish) {
208 finish = values + (size_t) ((uint64) values_num * (i + 1) / bins);
209 if (finish == start) {
211 *centers = i ? *(centers - 1) : -FLOAT_INF;
216 for (ptr = start; ptr != finish; ptr++) {
219 *centers = sum / (float) (finish - start);
235 probs = (
float *)
ckd_calloc(counts,
sizeof(*probs));
236 backoffs = (
float *)
ckd_calloc(counts,
sizeof(*backoffs));
237 raw_ngrams_end = raw_ngrams + counts;
239 for (backoff_num = 0, prob_num = 0; raw_ngrams != raw_ngrams_end;
241 probs[prob_num++] = raw_ngrams->prob;
242 backoffs[backoff_num++] = raw_ngrams->backoff;
245 make_bins(probs, prob_num, quant->tables[order - 2][0].begin,
246 1ULL << quant->prob_bits);
247 centers = quant->tables[order - 2][1].begin;
248 make_bins(backoffs, backoff_num, centers, (1ULL << quant->bo_bits));
254 lm_trie_quant_train_prob(
lm_trie_quant_t * quant,
int order, uint32 counts,
261 probs = (
float *)
ckd_calloc(counts,
sizeof(*probs));
262 raw_ngrams_end = raw_ngrams + counts;
264 for (prob_num = 0; raw_ngrams != raw_ngrams_end; raw_ngrams++) {
265 probs[prob_num++] = raw_ngrams->prob;
268 make_bins(probs, prob_num, quant->tables[order - 2][0].begin,
269 1ULL << quant->prob_bits);
275 int order_minus_2,
float prob,
float backoff)
278 (uint64) ((bins_encode
279 (&quant->tables[order_minus_2][0],
281 bo_bits) | bins_encode(&quant->
293 (uint32) bins_encode(quant->longest, prob));
300 return bins_decode(&quant->tables[order_minus_2][1],
309 address.offset += quant->bo_bits;
310 return bins_decode(&quant->tables[order_minus_2][0],
318 return bins_decode(quant->longest,
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Sphinx's memory allocation/deallocation routines.
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT void bitarr_write_int25(bitarr_address_t address, uint8 length, uint32 value)
Write specified value into bit array.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Structure that stores address of certain value in bit array.
SPHINXBASE_EXPORT uint32 bitarr_read_int25(bitarr_address_t address, uint8 length, uint32 mask)
Read uint32 value from bit array.
SPHINXBASE_EXPORT void bitarr_write_int57(bitarr_address_t address, uint8 length, uint64 value)
Write specified value into bit array.
Implementation of logging routines.