src/vlc.c

Go to the documentation of this file.
00001 /*
00002    libit - Library for basic source and channel coding functions
00003    Copyright (C) 2005-2006 Vivien Chappelier, Herve Jegou
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public
00016    License along with this library; if not, write to the Free
00017    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018 */
00019 
00020 /*
00021   Variable length codes
00022   Copyright (C) 2005-2006 Herve Jegou
00023 */
00024 
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <string.h>
00028 #include <math.h>
00029 #include <it/vlc.h>
00030 #include <it/io.h>
00031 #include <it/math.h>
00032 
00033 /*------------------------------------------------------------------------------*/
00034 vlc_t *vlc_new (int n)
00035 {
00036   vlc_t *vlc = (vlc_t *) malloc (sizeof (vlc_t));
00037   assert (vlc);
00038 
00039   vlc->nb_symb = n;
00040   vlc->nb_nodes = 2 * n - 1;  /* By default, we assume that the kraft equality is verified */
00041   vlc->node_root = 2 * n - 2;
00042 
00043   vlc->cwd_length = ivec_new_zeros (vlc->nb_nodes);
00044   vlc->cwd = ivec_new_zeros (vlc->nb_nodes);
00045   vlc->map[0] = ivec_new (vlc->nb_nodes);
00046   vlc->map[1] = ivec_new (vlc->nb_nodes);
00047   ivec_set (vlc->map[0], DUMM_NODE);
00048   ivec_set (vlc->map[1], DUMM_NODE);
00049 
00050   return vlc;
00051 }
00052 
00053 
00054 /*------------------------------------------------------------------------------*/
00055 vlc_t *vlc_clone (const vlc_t * vlc)
00056 {
00057   vlc_t *v = (vlc_t *) malloc (sizeof (vlc_t *));
00058   assert (v);
00059 
00060   v->nb_symb = vlc->nb_symb;
00061   v->nb_nodes = vlc->nb_nodes;
00062   v->node_root = vlc->node_root;
00063 
00064   v->cwd_length = ivec_clone (v->cwd_length);
00065   v->cwd = ivec_clone (v->cwd);
00066   v->map[0] = ivec_clone (v->map[0]);
00067   v->map[1] = ivec_clone (v->map[0]);
00068 
00069   return v;
00070 }
00071 
00072 
00073 /*------------------------------------------------------------------------------*/
00074 void vlc_delete (vlc_t * vlc)
00075 {
00076   assert (vlc);
00077   ivec_delete (vlc->cwd_length);
00078   ivec_delete (vlc->cwd);
00079   ivec_delete (vlc->map[0]);
00080   ivec_delete (vlc->map[1]);
00081   free (vlc);
00082 }
00083 
00084 
00085 /*------------------------------------------------------------------------------*/
00086 vlc_t *vlc_flc (int nb_bits)
00087 {
00088   vlc_t *flc = vlc_new (1 << nb_bits);
00089   int  l, l_start, l_start_next;
00090   int  nl, child0, child1;
00091 
00092   for (l = 0; l < nb_bits; l++) {
00093 
00094     l_start = (1 << l) - 1;
00095     l_start_next = (1 << (l + 1)) - 1;
00096 
00097     for (nl = l_start; nl < l_start_next; nl++) {
00098       child1 = (nl - l_start) * 2 + l_start_next;
00099       child0 = (nl - l_start) * 2 + 1 + l_start_next;
00100 
00101       flc->map[0][flc->node_root - nl] = flc->node_root - child0;
00102       flc->map[1][flc->node_root - nl] = flc->node_root - child1;
00103     }
00104 
00105   }
00106 
00107   vlc_affect_codewords (flc);
00108   return flc;
00109 }
00110 
00111 /*------------------------------------------------------------------------------*/
00112 vlc_t *vlc_huffman (const vec pdf)
00113 {
00114   int  i, n, argmin1, argmin2;
00115   double min1, min2;
00116   vlc_t *vlc = vlc_new (Vec_length (pdf));
00117   vec  probas = vec_new_zeros (vlc->nb_nodes);  /* The probability of the nodes are included in probas */
00118   /* Active_symbols allows to know which symbol/node must be added to the tree */
00119   bvec active_symbols = bvec_new_zeros (vlc->nb_nodes);
00120   bvec_set_between (active_symbols, 0, vlc->nb_symb - 1, 1);
00121 
00122   for (i = 0; i < vlc->nb_symb; i++)
00123     probas[i] = pdf[i];
00124 
00125   /* Construction of the tree */
00126   for (n = vlc->nb_symb; n < vlc->nb_nodes; n++) {
00127     argmin1 = -1;
00128     argmin2 = -1;
00129     min1 = 1;
00130     min2 = 2;
00131     for (i = 0; i < vlc->nb_nodes; i++) {
00132       if (active_symbols[i]) {
00133   if (probas[i] < min1) {
00134     min2 = min1;
00135     argmin2 = argmin1;
00136     min1 = probas[i];
00137     argmin1 = i;
00138   }
00139   else if (probas[i] < min2) {
00140     min2 = probas[i];
00141     argmin2 = i;
00142   }
00143       }
00144     }
00145 
00146     vlc->map[0][n] = argmin1;
00147     vlc->map[1][n] = argmin2;
00148     active_symbols[argmin1] = 0;
00149     active_symbols[argmin2] = 0;
00150 
00151     /* Process the proba of the new symbol */
00152     probas[n] = probas[argmin1] + probas[argmin2];
00153 
00154     /* Let the new symbol be active and the symbols min1 and min2 be inactive */
00155     active_symbols[n] = 1;
00156   }
00157   vec_delete (probas);
00158   bvec_delete (active_symbols);
00159   vlc_affect_codewords (vlc);
00160   return vlc;
00161 }
00162 
00163 
00164 /*--------------------------------------------------------------*/
00165 /* Create the VLC code according to the Hu-Tucker algorithm
00166    The algorithm assumes that the probability distribution function is pdf.
00167    Note: this implementation could (and should) be optimized in terms of computionnal cost */
00168 vlc_t *vlc_hu_tucker (const vec pdf)
00169 {
00170   int  i, j, k, i1, i2, n, node1, node2;
00171   vlc_t *vlc = vlc_new (vec_length (pdf));
00172   ivec active_symbols = ivec_new_arithm (0, 1, vlc->nb_symb);
00173   vec  probas = vec_new_zeros (vlc->nb_nodes);  /* The probability of the nodes are included in probas */
00174   int  nrof_act = vlc->nb_symb; /* number of active symbols */
00175   vec  dumm_vec_symb = vec_new_1N (vlc->nb_symb);
00176 
00177   for (i = 0; i < vlc->nb_symb; i++)
00178     probas[i] = pdf[i];
00179 
00180   for (n = vlc->nb_symb; n < vlc->nb_nodes; n++) {
00181     /* At this point, all couples are assumed compatible    */
00182     ivec merg_comp = ivec_new_ones (nrof_act * nrof_act);
00183 
00184     /* Store the probability that is the smallest among compatible nodes and the corresponding indexes */
00185     ivec best_for_i = ivec_new (nrof_act);
00186     vec  best_for_i_prob = vec_new (nrof_act);
00187     ivec_set (best_for_i, DUMM_NODE);
00188     vec_set (best_for_i_prob, 1.1);
00189 
00190     /* Process the merging compatibilities */
00191     for (i = 0; i < nrof_act; i++) {
00192       for (j = i + 1; j < nrof_act; j++) {
00193   /* Test if there is an original block between i and j */
00194   for (k = i + 1; k < j; k++)
00195     if (vlc_is_leaf (vlc, active_symbols[k])) {
00196       merg_comp[i * nrof_act + j] = 0;
00197       merg_comp[j * nrof_act + i] = 0;
00198     }
00199       }
00200       /* A node can not be merged with itself */
00201       merg_comp[i * nrof_act + i] = 0;
00202     }
00203 
00204     /* Retrieve the couple corresponding to smallest probabilities */
00205     for (i = 0; i < nrof_act; i++)
00206       for (j = 0; j < nrof_act; j++)
00207 
00208   if (merg_comp[i * nrof_act + j] == 1)
00209     if (probas[active_symbols[j]] < best_for_i_prob[i]) {
00210       best_for_i[i] = j;
00211       best_for_i_prob[i] = probas[active_symbols[j]];
00212     }
00213 
00214     /* Retrieve at least one of the candidate couples */
00215     /* The new index is assigned the index n          */
00216     i1 = -1, i2 = -1;
00217     for (i = 0; i < nrof_act && i1 < 0; i++)
00218       for (j = 0; j < nrof_act && i1 < 0; j++)
00219   if (merg_comp[i * nrof_act + j]
00220       && (best_for_i[i] == j) && (best_for_i[j] == i)) {
00221     i1 = i;
00222     i2 = j;
00223   }
00224 
00225     node1 = active_symbols[i1];
00226     node2 = active_symbols[i2];
00227     vlc->map[0][n] = node1;
00228     vlc->map[1][n] = node2;
00229 
00230     /* Process the proba of the new symbol */
00231     probas[n] = probas[node1] + probas[node2];
00232 
00233     /* Update the list of active symbols */
00234     active_symbols[i1] = n;
00235     ivec_del (active_symbols, i2);
00236     ivec_delete (merg_comp);
00237     ivec_delete (best_for_i);
00238     vec_delete (best_for_i_prob);
00239 
00240     nrof_act--;
00241   }
00242 
00243   vlc_affect_codewords (vlc);
00244   vlc_quasi_lexicographic (vlc, pdf, dumm_vec_symb);
00245 
00246   vec_delete (dumm_vec_symb);
00247   vec_delete (probas);
00248   ivec_delete (active_symbols);
00249   return vlc;
00250 }
00251 
00252 
00253 /*------------------------------------------------------------------------------*/
00254 static void vlc_affect_codewords_subtree (vlc_t * vlc, int s)
00255 {
00256   /* Symbol is a dumm symbol */
00257   if (s == DUMM_NODE)
00258     return;
00259 
00260   /* Symbol is a leaf */
00261   if (!vlc_is_leaf (vlc, s)) {
00262     /* Otherwise, internal node */
00263     int  child0 = vlc_get_child0 (vlc, s);
00264     int  child1 = vlc_get_child1 (vlc, s);
00265 
00266     /* The child0 is a valid symbols/leaf */
00267     if (child0 != DUMM_NODE) {
00268       vlc->cwd_length[child0] = vlc->cwd_length[s] + 1;
00269       vlc->cwd[child0] = vlc->cwd[s];
00270       vlc_affect_codewords_subtree (vlc, child0);
00271     }
00272 
00273     /* Same story */
00274     if (child1 != DUMM_NODE) {
00275       vlc->cwd_length[child1] = vlc->cwd_length[s] + 1;
00276       vlc->cwd[child1] = vlc->cwd[s] + (1 << (vlc->cwd_length[s]));
00277       vlc_affect_codewords_subtree (vlc, child1);
00278     }
00279   }
00280 }
00281 
00282 
00283 /*------------------------------------------------------------------------------*/
00284 /* To call this function, the map table must be correctly set, and the 
00285    pointer of variables codes and cwd_length must be already allocated 
00286    (It is the case if Function vlc_new has been called)                         */
00287 void vlc_affect_codewords (vlc_t * vlc)
00288 {
00289   vlc->cwd[vlc->node_root] = 0;
00290   vlc->cwd_length[vlc->node_root] = 0;
00291   vlc_affect_codewords_subtree (vlc, vlc->node_root);
00292 }
00293 
00294 
00295 /*------------------------------------------------------------------------------*/
00296 int vlc_minh (const vlc_t * vlc)
00297 {
00298   int  the_min = 1000;
00299   int  i;
00300   for (i = 0; i < vlc->nb_symb; i++)
00301     if (the_min > vlc->cwd_length[i])
00302       the_min = vlc->cwd_length[i];
00303   return the_min;
00304 }
00305 
00306 
00307 /*------------------------------------------------------------------------------*/
00308 int vlc_maxh (const vlc_t * vlc)
00309 {
00310   int  the_max = 0;
00311   int  i;
00312   for (i = 0; i < vlc->nb_symb; i++)
00313     if (the_max < vlc->cwd_length[i])
00314       the_max = vlc->cwd_length[i];
00315   return the_max;
00316 }
00317 
00318 
00319 /*------------------------------------------------------------------------------*/
00320 double vlc_mdl (const vlc_t * vlc, const vec pdf)
00321 {
00322   double mdl = 0;
00323   int  i;
00324   for (i = 0; i < vlc->nb_symb; i++)
00325     mdl += vlc->cwd_length[i] * pdf[i];
00326   return mdl;
00327 }
00328 
00329 
00330 /*------------------------------------------------------------------------------*/
00331 double vlc_kraft_sum (const vlc_t * vlc)
00332 {
00333   double the_sum = 0;
00334   int  i;
00335   for (i = 0; i < vlc->nb_symb; i++) {
00336     the_sum += pow (2, -vlc->cwd_length[i]);
00337   }
00338 
00339   return the_sum;
00340 }
00341 
00342 
00343 /*------------------------------------------------------------------------------*/
00344 /* Tranform the tree in order to generate a quasi-lexicographic order           */
00345 void vlc_quasi_lexicographic (const vlc_t * vlc, const vec pdf,
00346             const vec symb)
00347 {
00348   int  i, l, curnode = vlc->nb_symb, child0, child1;
00349   ivec tmp_idx, tmp_idx_sub;  /* Node/Leaf index  */
00350   vec  tmp_rec, tmp_rec_sub;  /* Reconstruction value for nodes/symbols */
00351   int  nbtmpsymb;   /* Number of symbols/node in this layer   */
00352   int  maxh;
00353 
00354   vec  probas = vec_new_zeros (vlc->nb_nodes);  /* The probability of the nodes are included in probas */
00355   vec  symbols = vec_new_zeros (vlc->nb_nodes);
00356 
00357   assert (vlc);
00358   assert (vlc_kraft_sum (vlc) == 1);
00359 
00360   for (i = 0; i < vlc->nb_symb; i++) {
00361     probas[i] = pdf[i];
00362     symbols[i] = symb[i];
00363   }
00364   ivec_set_between (vlc->cwd_length, vlc->nb_symb, vlc->nb_nodes - 1, 0);
00365 
00366   maxh = vlc_maxh (vlc);
00367 
00368   /* The highest size is allocated by default                                   */
00369   tmp_idx = ivec_new (vlc->nb_nodes);
00370   tmp_rec = vec_new (vlc->nb_nodes);
00371 
00372   for (l = maxh; l >= 0; l--) {
00373     ivec sort_order;
00374 
00375     /* Retrieve the nodes of the current layer */
00376     nbtmpsymb = 0;
00377 
00378     for (i = 0; i < vlc->nb_nodes; i++)
00379       if (vlc->cwd_length[i] == l) {
00380   /* Is it a node/leaf of the focused layer? */
00381   tmp_idx[nbtmpsymb] = i;
00382   tmp_rec[nbtmpsymb] = symbols[i];
00383   nbtmpsymb++;
00384       }
00385 
00386     /* We have to sort the vectors tmp_idx and tmp_rec according to
00387        the order generated by values of tmp_rec
00388      */
00389     Vec_length (tmp_rec) = nbtmpsymb;
00390     sort_order = vec_sort_index (tmp_rec);
00391     Vec_length (tmp_rec) = vlc->nb_nodes;
00392 
00393     tmp_idx_sub = ivec_index_by (tmp_idx, sort_order);
00394     tmp_rec_sub = vec_index_by (tmp_rec, sort_order);
00395 
00396     memcpy (tmp_idx, tmp_idx_sub, nbtmpsymb * sizeof (*tmp_idx));
00397     memcpy (tmp_rec, tmp_rec_sub, nbtmpsymb * sizeof (*tmp_idx));
00398 
00399     /* Aggregation of symbols of the current layer by pairs */
00400     for (i = 0; i < nbtmpsymb / 2; i++) {
00401       child0 = tmp_idx[i * 2];
00402       child1 = tmp_idx[i * 2 + 1];
00403 
00404       /* Update the map table */
00405       vlc->map[0][curnode] = child0;
00406       vlc->map[1][curnode] = child1;
00407 
00408       /* probas, expectation, length,  of this node */
00409       probas[curnode] = probas[child0] + probas[child1];
00410       symbols[curnode] = (probas[child0] * symbols[child0]
00411         +
00412         probas[child1] * symbols[child1]) /
00413   (probas[curnode]);
00414       vlc->cwd_length[curnode] = vlc->cwd_length[child0] - 1;
00415 
00416       /* update the structure used for the next sort */
00417       tmp_idx[curnode] = curnode;
00418       tmp_rec[curnode] = symbols[curnode];
00419 
00420       curnode++;
00421     }
00422 
00423     ivec_delete (tmp_idx_sub);
00424     vec_delete (tmp_rec_sub);
00425     ivec_delete (sort_order);
00426   }
00427 
00428   vlc->cwd[vlc->node_root] = 0;
00429 
00430   for (i = vlc->nb_nodes - 1; i >= vlc->nb_symb; i--) {
00431     child0 = vlc->map[0][i];
00432     child1 = vlc->map[1][i];
00433     vlc->cwd[child0] = vlc->cwd[i];
00434     vlc->cwd[child1] = vlc->cwd[i] + (1 << (vlc->cwd_length[i]));
00435   }
00436 
00437   ivec_delete (tmp_idx);
00438   vec_delete (tmp_rec);
00439   vec_delete (probas);
00440   vec_delete (symbols);
00441 }
00442 
00443 
00444 /*------------------------------------------------------------------------------*/
00445 /* Return the respective probabilities of the nodes                              */
00446 
00447 static void vlc_nodes_pdf_tmp (const vlc_t * vlc, int node, vec pdf)
00448 {
00449   int  child0, child1;
00450 
00451   if (node < vlc->nb_symb)
00452     return;
00453 
00454   child0 = vlc_get_child0 (vlc, node);
00455   child1 = vlc_get_child1 (vlc, node);
00456 
00457   vlc_nodes_pdf_tmp (vlc, child0, pdf);
00458   vlc_nodes_pdf_tmp (vlc, child1, pdf);
00459 
00460   if (child0 == DUMM_NODE) {
00461     pdf[node] = pdf[child1];
00462   }
00463   else if (child1 == DUMM_NODE) {
00464     pdf[node] = pdf[child0];
00465   }
00466   else {
00467     pdf[node] = pdf[child0] + pdf[child1];
00468   }
00469 }
00470 
00471 
00472 vec vlc_nodes_pdf (const vlc_t * vlc, vec pdf)
00473 {
00474   int  i;
00475   vec  probas = vec_new_zeros (vlc->nb_nodes);  /* The probability of the nodes are included in probas */
00476 
00477   for (i = 0; i < vlc->nb_symb; i++)
00478     probas[i] = pdf[i];
00479 
00480   vlc_nodes_pdf_tmp (vlc, vlc->node_root, probas);
00481   return probas;
00482 }
00483 
00484 
00485 vec vlc_nodes_proba0 (const vlc_t * vlc, vec pdf)
00486 {
00487   int  i, child0, child1;
00488   vec  nodes_pdf = vlc_nodes_pdf (vlc, pdf);  /* The probability of the nodes are included in probas */
00489   vec  proba0 = vec_new (vlc->nb_nodes - vlc->nb_symb);
00490   double pchild0, pchild1;
00491 
00492   for (i = vlc->nb_symb; i < vlc->nb_nodes; i++) {
00493     child0 = vlc->map[0][i];
00494     child1 = vlc->map[1][i];
00495 
00496     if (child0 != DUMM_NODE)
00497       pchild0 = nodes_pdf[child0];
00498     else
00499       pchild0 = 0;
00500 
00501     if (child1 != DUMM_NODE)
00502       pchild1 = nodes_pdf[child1];
00503     else
00504       pchild1 = 0;
00505 
00506     proba0[i - vlc->nb_symb] = pchild0 / (pchild0 + pchild1);
00507   }
00508 
00509   vec_delete (nodes_pdf);
00510   return proba0;
00511 }
00512 
00513 
00514 /*------------------------------------------------------------------------------*/
00515 /* Return the respective expectations of the nodes                              */
00516 
00517 static void vlc_nodes_expectation_tmp (const vlc_t * vlc,
00518                int node, vec pdf, vec symbols)
00519 {
00520   int  child0, child1;
00521 
00522   if (node < vlc->nb_symb)
00523     return;
00524 
00525   child0 = vlc_get_child0 (vlc, node);
00526   child1 = vlc_get_child1 (vlc, node);
00527 
00528   vlc_nodes_expectation_tmp (vlc, child0, pdf, symbols);
00529   vlc_nodes_expectation_tmp (vlc, child1, pdf, symbols);
00530 
00531   if (child0 == DUMM_NODE) {
00532     pdf[node] = pdf[vlc_get_child1 (vlc, node)];
00533     symbols[node] = symbols[vlc_get_child1 (vlc, node)];
00534   }
00535   else if (child1 == DUMM_NODE) {
00536     pdf[node] = pdf[child0];
00537     symbols[node] = symbols[child0];
00538   }
00539   else {
00540     pdf[node] = pdf[child0] + pdf[child1];
00541     symbols[node] = (symbols[child0] * pdf[child0]
00542          + symbols[child1] * pdf[child1]) / pdf[node];
00543   }
00544 }
00545 
00546 
00547 vec vlc_nodes_expectation (const vlc_t * vlc, vec pdf, vec symb)
00548 {
00549   int  i;
00550   vec  probas = vec_new_zeros (vlc->nb_nodes);  /* The probability of the nodes are included in probas */
00551   vec  symbols = vec_new_zeros (vlc->nb_nodes);
00552 
00553   for (i = 0; i < vlc->nb_symb; i++) {
00554     probas[i] = pdf[i];
00555     symbols[i] = symb[i];
00556   }
00557 
00558   vlc_nodes_expectation_tmp (vlc, vlc->node_root, probas, symbols);
00559   vec_delete (probas);
00560   return symbols;
00561 }
00562 
00563 
00564 /*------------------------------------------------------------------*/
00565 vec vlc_nodes_entropy (const vlc_t * vlc, vec pdf)
00566 {
00567   int  i, child0, child1;
00568   vec  probas = vlc_nodes_pdf (vlc, pdf);
00569   vec  H = vec_new_zeros (vec_length (probas));
00570   double p0, p1;
00571 
00572   for (i = vlc->nb_symb; i < vlc->nb_nodes; i++) {
00573     child0 = vlc_get_child0 (vlc, i);
00574     child1 = vlc_get_child1 (vlc, i);
00575 
00576     if (child0 == DUMM_NODE || child1 == DUMM_NODE || probas[child0] == 0
00577   || probas[child1] == 0 || probas[i] == 0)
00578       H[i] = 0;
00579     else {
00580       p0 = probas[child0] / probas[i];
00581       p1 = probas[child1] / probas[i];
00582       H[i] = -p0 * log2 (p0) - p1 * log2 (p1);
00583     }
00584   }
00585 
00586   vec_delete (probas);
00587   return H;
00588 }
00589 
00590 
00591 /*------------------------------------------------------------------*/
00592 /* Return the respective variance of the nodes                      */
00593 
00594 /* process the contribution of the leaves of the node node to the expectation of 
00595    the mean error corresponding to the reconstruction value rec_value )         */
00596 static double vlc_node_variance_tmp (const vlc_t * vlc,
00597              int node,
00598              double rec_value,
00599              vec pdf, vec symb, double *p_sum_pdf)
00600 {
00601   double r = 0;
00602   int  child0, child1;
00603 
00604   if (node < vlc->nb_symb) {
00605     *p_sum_pdf += pdf[node];
00606     return pdf[node] * (symb[node] - rec_value) * (symb[node] - rec_value);
00607   }
00608 
00609   /* At this point, we know that we do not consider a leaf */
00610   child0 = vlc_get_child0 (vlc, node);
00611   child1 = vlc_get_child1 (vlc, node);
00612 
00613   if (child0 != DUMM_NODE)
00614     r += vlc_node_variance_tmp (vlc, child0, rec_value, pdf, symb, p_sum_pdf);
00615 
00616   if (child1 != DUMM_NODE)
00617     r += vlc_node_variance_tmp (vlc, child1, rec_value, pdf, symb, p_sum_pdf);
00618 
00619   return r;
00620 }
00621 
00622 
00623 double vlc_node_variance (const vlc_t * vlc, int node, vec pdf, vec symb)
00624 {
00625   double sum_pdf = 0;
00626   double v =
00627     vlc_node_variance_tmp (vlc, node, symb[node], pdf, symb, &sum_pdf);
00628 
00629   if (sum_pdf > 0)
00630     return v / sum_pdf;
00631   else
00632     return 0;
00633 }
00634 
00635 
00636 vec vlc_nodes_variance (const vlc_t * vlc, vec pdf, vec symb)
00637 {
00638   vec  E = vlc_nodes_expectation (vlc, pdf, symb);
00639   int  i;
00640   vec  variances = vec_new_zeros (vlc->nb_nodes);
00641   for (i = vlc->nb_symb; i < vlc->nb_nodes; i++)
00642     variances[i] = vlc_node_variance (vlc, i, pdf, E);
00643 
00644   vec_delete (E);
00645   return variances;
00646 }
00647 
00648 
00649 /*------------------------------------------------------------------*/
00650 /* Expectation of the decrease of MSE corresponding to internal nodes */
00651 vec vlc_nodes_delta_energy (const vlc_t * vlc, vec pdf, vec symb)
00652 {
00653   int  i, child0, child1;
00654   double v0, v1, p0, p1;
00655   vec  P = vlc_nodes_pdf (vlc, pdf);
00656   vec  V = vlc_nodes_variance (vlc, pdf, symb);
00657   vec  D = vec_new_zeros (vlc->nb_nodes);
00658 
00659   for (i = vlc->nb_symb; i < vlc->nb_nodes; i++) {
00660     child0 = vlc_get_child0 (vlc, i);
00661     child1 = vlc_get_child1 (vlc, i);
00662 
00663     if (child0 == DUMM_NODE) {
00664       v0 = 0;
00665       p0 = 0;
00666     }
00667     else {
00668       v0 = V[child0];
00669       p0 = P[child0];
00670     }
00671 
00672     if (child1 == DUMM_NODE) {
00673       v1 = 0;
00674       p1 = 0;
00675     }
00676     else {
00677       v1 = V[child1];
00678       p1 = P[child1];
00679     }
00680     D[i] = V[i] - (p0 * v0 + p1 * v1) / P[i];
00681   }
00682 
00683   vec_delete (P);
00684   vec_delete (V);
00685   return D;
00686 }
00687 
00688 
00689 /*------------------------------------------------------------------*/
00690 void vlc_print (const vlc_t * vlc)
00691 {
00692   int  i, c;
00693   assert (vlc);
00694 
00695   printf ("{");
00696   for (i = 0; i < vlc->nb_symb; i++) {
00697     int  code;
00698 
00699     if (i > 0)
00700       printf (" ");
00701 
00702     code = vlc_get_cwd (vlc, i);
00703 
00704     if (vlc->cwd_length[i] == 0)  /* Should not occur every day... */
00705       printf ("/");
00706     else
00707       for (c = 0; c < vlc->cwd_length[i]; c++) {
00708   char bit = (((code % 2) > 0) ? 1 : 0);
00709   code >>= 1;
00710   printf ("%d", bit);
00711       }
00712   }
00713   printf ("}");
00714 }
00715 
00716 
00717 /*------------------------------------------------------------------*/
00718 vlc_t *vlc_read (const char *svlc)
00719 {
00720   int  node, symb, bit, child_node;
00721   char *s_start = strpbrk (svlc, "{");
00722   char *s_end = strpbrk (svlc, "}");
00723   char *s;
00724   vlc_t *vlc = (vlc_t *) malloc (sizeof (vlc_t));
00725 
00726   assert (vlc);
00727   if (s_start == NULL || s_end == NULL || s_start >= s_end)
00728     it_error ("String " "%s" " does not represent a valid vlc\n", s_start);
00729 
00730   s = strpbrk (s_start, "01");
00731   if (s == NULL)
00732     it_error ("String " "%s" " does not represent a valid vlc\n", s_start);
00733 
00734   /* First count the number of symbols */
00735   vlc->nb_symb = 0;
00736 
00737   while (s < s_end && s != NULL) {
00738     s = strpbrk (s, " ,}");
00739     s = strpbrk (s, "01");
00740     vlc->nb_symb++;
00741   }
00742 
00743   /* By default, only the root node is allocated */
00744   vlc->nb_nodes = vlc->nb_symb + 1;
00745   vlc->node_root = vlc->nb_symb;
00746 
00747   vlc->cwd_length = ivec_new_alloc (vlc->nb_symb + 1, vlc->nb_symb + 1);
00748   vlc->cwd = ivec_new_alloc (vlc->nb_symb + 1, vlc->nb_symb + 1);
00749   vlc->map[0] = ivec_new_alloc (vlc->nb_symb + 1, vlc->nb_symb + 1);
00750   vlc->map[1] = ivec_new_alloc (vlc->nb_symb + 1, vlc->nb_symb + 1);
00751 
00752   vlc->cwd_length[vlc->node_root] = 0;
00753   vlc->cwd[vlc->node_root] = 0;
00754   vlc->map[0][vlc->node_root] = DUMM_NODE;
00755   vlc->map[1][vlc->node_root] = DUMM_NODE;
00756 
00757   symb = 0;
00758   s = strpbrk (s_start, "01");
00759   while (s < s_end && s != NULL) {
00760 
00761     node = vlc->node_root;
00762 
00763     while (*s == '0' || *s == '1') {
00764       bit = (int) (*s - '0');
00765 
00766       /* if node does not exists => allocated a new node */
00767       if (vlc->map[bit][node] == DUMM_NODE) {
00768   if (s[1] == '0' || s[1] == '1') { /* Internal node is added */
00769     child_node = ivec_length (vlc->cwd);
00770     ivec_push (vlc->cwd,
00771          vlc->cwd[node] + (bit << vlc->cwd_length[node]));
00772     ivec_push (vlc->cwd_length, vlc->cwd_length[node] + 1);
00773     ivec_push (vlc->map[0], DUMM_NODE);
00774     ivec_push (vlc->map[1], DUMM_NODE);
00775     vlc->map[bit][node] = child_node;
00776     vlc->nb_nodes++;
00777   }
00778   else {      /* Symbol */
00779     child_node = symb++;
00780     vlc->cwd[child_node] = vlc->cwd[node]
00781       + (bit << vlc->cwd_length[node]);
00782     vlc->cwd_length[child_node] = vlc->cwd_length[node] + 1;
00783     vlc->map[bit][node] = child_node;
00784   }
00785       }
00786 
00787       node = vlc->map[bit][node]; /* Go tho child node */
00788       s++;
00789     }
00790     s = strpbrk (s, "01");
00791   }
00792 
00793   vlc_affect_codewords (vlc);
00794 
00795   return vlc;
00796 }
00797 
00798 
00799 /*------------------------------------------------------------------*/
00800 ivec vlc_energy_order (const vlc_t * vlc, vec pdf, vec symb)
00801 {
00802   ivec NO = ivec_new_alloc (0, vlc->nb_nodes - vlc->nb_symb); /* Node order */
00803   ivec NI = ivec_new_alloc (0, vlc->nb_nodes - vlc->nb_symb); /* Temporary */
00804   vec  RDNI, HNI;
00805 
00806   vec  D = vlc_nodes_delta_energy (vlc, pdf, symb);
00807   vec  H = vlc_nodes_entropy (vlc, pdf);
00808 
00809   int  n, node, child0, child1; /* The node with the highest energy and its child */
00810 
00811   ivec_push (NI, vlc->node_root);
00812 
00813   while (ivec_length (NI) > 0) {
00814     RDNI = vec_index_by (D, NI);
00815     HNI = vec_index_by (H, NI);
00816     vec_div (RDNI, HNI);
00817 
00818     n = vec_max_index (RDNI);
00819     node = NI[n];
00820 
00821     child0 = vlc_get_child0 (vlc, node);
00822     child1 = vlc_get_child1 (vlc, node);
00823 
00824     ivec_push (NO, node);
00825     ivec_del (NI, n);
00826 
00827     if (child0 >= vlc->nb_symb)
00828       ivec_push (NI, child0);
00829 
00830     if (child1 >= vlc->nb_symb)
00831       ivec_push (NI, child1);
00832 
00833     vec_delete (RDNI);
00834     vec_delete (HNI);
00835   }
00836 
00837   ivec_delete (NI);
00838   vec_delete (D);
00839   vec_delete (H);
00840 
00841   return NO;
00842 }
00843 
00844 
00845 /*------------------------------------------------------------------*/
00846 void vlc_print_all (const vlc_t * vlc, vec pdf, vec symb)
00847 {
00848   int  i, c;
00849   vec  P = vlc_nodes_pdf (vlc, pdf);
00850   vec  E = vlc_nodes_expectation (vlc, pdf, symb);
00851   vec  V = vlc_nodes_variance (vlc, pdf, symb);
00852   vec  H = vlc_nodes_entropy (vlc, pdf);
00853   vec  D = vlc_nodes_delta_energy (vlc, pdf, symb);
00854 
00855   for (i = 0; i < vlc->nb_nodes; i++) {
00856     int  code;
00857     if (vlc_is_leaf (vlc, i))
00858       printf ("Symbol %d\t: %.3f\t%.7f \t\t\t\t\t\t", i, E[i], P[i]);
00859     else
00860       printf ("Node %d\t: %.3f\t%.7f\t%.5f\t%.3f\t%.4f\t%.3f\t",
00861         i, E[i], P[i], H[i], V[i], D[i], D[i] / H[i]);
00862 
00863     code = vlc_get_cwd (vlc, i);
00864 
00865     if (vlc->cwd_length[i] == 0)
00866       printf ("/");
00867     else
00868       for (c = 0; c < vlc->cwd_length[i]; c++) {
00869   char bit = (((code % 2) > 0) ? 1 : 0);
00870   code >>= 1;
00871   printf ("%d", bit);
00872       }
00873     printf ("\n");
00874   }
00875 
00876   vec_delete (E);
00877   vec_delete (V);
00878   vec_delete (P);
00879   vec_delete (H);
00880   vec_delete (D);
00881 }
00882 
00883 
00884 /*------------------------------------------------------------------------------*/
00885 int vlc_nb_bits_required (const vlc_t * vlc, ivec S)
00886 {
00887   idx_t i;
00888   int  n = 0;
00889   for (i = 0; i < ivec_length (S); i++)
00890     n += vlc_get_cwd_length (vlc, S[i]);
00891   return n;
00892 }
00893 
00894 
00895 /*------------------------------------------------------------------------------*/
00896 bvec vlc_encode_concat (const vlc_t * vlc, ivec S)
00897 {
00898   idx_t t;      /* symbol clock */
00899   int  c, cwd;      /* codeword clock, codeword  */
00900   bvec E = bvec_new_alloc (0, ivec_length (S) * vlc_maxh (vlc));
00901 
00902   for (t = 0; t < ivec_length (S); t++) {
00903     cwd = vlc_get_cwd (vlc, S[t]);
00904 
00905     for (c = 0; c < vlc_get_cwd_length (vlc, S[t]); c++) {
00906       bvec_push (E, (char) (cwd & 1));
00907       cwd >>= 1;
00908     }
00909   }
00910   return E;
00911 }
00912 
00913 
00914 /*------------------------------------------------------------------------------*/
00915 ivec vlc_decode_concat (const vlc_t * vlc, bvec E)
00916 {
00917   idx_t b;      /* bit clock */
00918   ivec D = ivec_new_alloc (0, bvec_length (E) / vlc_maxh (vlc));
00919   int  n = vlc->node_root;  /* node */
00920 
00921   for (b = 0; b < bvec_length (E); b++) {
00922     n = vlc->map[E[b]][n];
00923 
00924     if (vlc_is_leaf (vlc, n)) { /* The symbol is decoded */
00925       ivec_push (D, n);
00926       n = vlc->node_root;
00927     }
00928   }
00929   return D;
00930 }
00931 
00932 
00933 ivec vlc_decode_concat_N (const vlc_t * vlc, bvec E, idx_t N)
00934 {
00935   idx_t b, t = 0;   /* bit and symbol clocks */
00936   ivec D = ivec_new (N);
00937   int  n = vlc->node_root;  /* node */
00938   ivec_set (D, vlc->node_root);
00939 
00940   for (b = 0; b < bvec_length (E) && t < N; b++) {
00941     n = vlc->map[E[b]][n];
00942 
00943     if (vlc_is_leaf (vlc, n)) { /* The symbol is decoded */
00944       D[t++] = n;
00945       n = vlc->node_root;
00946     }
00947   }
00948   return D;
00949 }
00950 

Hosted by
Copyright (C) 2005-2006 Hervé Jégou
Vivien Chappelier
Francois Cayre
libit logo courtesy of Jonathan Delhumeau