src/distance.c

Go to the documentation of this file.
00001 /*
00002    libit - Library for basic source and channel coding functions
00003    Copyright (C) 2005-2005 Vivien Chappelier, Herve Jegou
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public
00016    License along with this library; if not, write to the Free
00017    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018 */
00019 
00020 /*
00021   Distance measures
00022   Copyright (C) 2005 Vivien Chappelier, Herve Jegou
00023 */
00024 
00025 #include <it/types.h>
00026 #include <it/distance.h>
00027 #include <it/source_func.h>
00028 #include <math.h>
00029 
00030 /*---------------------------------------------------------------------------*/
00031 int vec_distance_hamming (vec v1, vec v2)
00032 {
00033   int  lmin, lmax, d = 0;
00034   idx_t i;
00035 
00036   if (vec_length (v1) > vec_length (v2)) {
00037     lmin = vec_length (v2);
00038     lmax = vec_length (v1);
00039   }
00040   else {
00041     lmin = vec_length (v1);
00042     lmax = vec_length (v2);
00043   }
00044   for (i = 0; i < lmin; i++)
00045     if (v1[i] != v2[i])
00046       d++;
00047   return d + lmax - lmin;
00048 }
00049 
00050 
00051 int ivec_distance_hamming (ivec v1, ivec v2)
00052 {
00053   int  lmin, lmax, d = 0;
00054   idx_t i;
00055 
00056   if (ivec_length (v1) > ivec_length (v2)) {
00057     lmin = ivec_length (v2);
00058     lmax = ivec_length (v1);
00059   }
00060   else {
00061     lmin = ivec_length (v1);
00062     lmax = ivec_length (v2);
00063   }
00064   for (i = 0; i < lmin; i++)
00065     if (v1[i] != v2[i])
00066       d++;
00067   return d + lmax - lmin;
00068 }
00069 
00070 
00071 int bvec_distance_hamming (bvec v1, bvec v2)
00072 {
00073   int  lmin, lmax, d = 0;
00074   idx_t i;
00075 
00076   if (bvec_length (v1) > bvec_length (v2)) {
00077     lmin = bvec_length (v2);
00078     lmax = bvec_length (v1);
00079   }
00080   else {
00081     lmin = bvec_length (v1);
00082     lmax = bvec_length (v2);
00083   }
00084   for (i = 0; i < lmin; i++)
00085     if (v1[i] != v2[i])
00086       d++;
00087   return d + lmax - lmin;
00088 }
00089 
00090 
00091 int cvec_distance_hamming (cvec v1, cvec v2)
00092 {
00093   int  lmin, lmax, d = 0;
00094   idx_t i;
00095 
00096   if (cvec_length (v1) > cvec_length (v2)) {
00097     lmin = cvec_length (v2);
00098     lmax = cvec_length (v1);
00099   }
00100   else {
00101     lmin = cvec_length (v1);
00102     lmax = cvec_length (v2);
00103   }
00104   for (i = 0; i < lmin; i++)
00105     if (!ceq (v1[i], v2[i]))
00106       d++;
00107   return d + lmax - lmin;
00108 }
00109 
00110 
00111 /*-----------------------------------------------------------------------*/
00112 double vec_ser (vec v1, vec v2)
00113 {
00114   int  lmin, lmax, d = 0;
00115   idx_t i;
00116 
00117   if (vec_length (v1) > vec_length (v2)) {
00118     lmin = vec_length (v2);
00119     lmax = vec_length (v1);
00120   }
00121   else {
00122     lmin = vec_length (v1);
00123     lmax = vec_length (v2);
00124   }
00125   for (i = 0; i < lmin; i++)
00126     if (v1[i] != v2[i])
00127       d++;
00128 
00129   return (d + lmax - vec_length (v2)) / (double) vec_length (v1);
00130 }
00131 
00132 
00133 double ivec_ser (ivec v1, ivec v2)
00134 {
00135   int  lmin, lmax, d = 0;
00136   idx_t i;
00137 
00138   if (ivec_length (v1) > ivec_length (v2)) {
00139     lmin = ivec_length (v2);
00140     lmax = ivec_length (v1);
00141   }
00142   else {
00143     lmin = ivec_length (v1);
00144     lmax = ivec_length (v2);
00145   }
00146   for (i = 0; i < lmin; i++)
00147     if (v1[i] != v2[i])
00148       d++;
00149 
00150   return (d + lmax - ivec_length (v2)) / (double) ivec_length (v1);
00151 }
00152 
00153 
00154 double bvec_ber (bvec v1, bvec v2)
00155 {
00156   int  lmin, lmax, d = 0;
00157   idx_t i;
00158 
00159   if (bvec_length (v1) > bvec_length (v2)) {
00160     lmin = bvec_length (v2);
00161     lmax = bvec_length (v1);
00162   }
00163   else {
00164     lmin = bvec_length (v1);
00165     lmax = bvec_length (v2);
00166   }
00167   for (i = 0; i < lmin; i++)
00168     if (v1[i] != v2[i])
00169       d++;
00170 
00171   return (d + lmax - bvec_length (v2)) / (double) bvec_length (v1);
00172 }
00173 
00174 
00175 
00176 /*-----------------------------------------------------------------------*/
00177 int ivec_distance_levenshtein (ivec v1, ivec v2, int cost_ins, int cost_del,
00178              int cost_sub)
00179 {
00180   /* i1 and i2 respectively index the positions in vectors v1 and v2 */
00181   idx_t i1, i2;
00182   int  d;
00183 
00184   imat dislev = imat_new (ivec_length (v1) + 1, ivec_length (v2) + 1);
00185   imat_set (dislev, 2 * ivec_length (v1));
00186 
00187   /* Initializations */
00188   for (i1 = 0; i1 < ivec_length (v1) + 1; i1++)
00189     dislev[i1][0] = i1;
00190 
00191   for (i2 = 0; i2 < ivec_length (v2) + 1; i2++)
00192     dislev[0][i2] = i2;
00193 
00194   for (i1 = 0; i1 < ivec_length (v1); i1++)
00195     for (i2 = 0; i2 < ivec_length (v2); i2++) {
00196       /* First try the substitution */
00197       if (v1[i1] == v2[i2])
00198   dislev[i1 + 1][i2 + 1] = dislev[i1][i2];
00199       else
00200   dislev[i1 + 1][i2 + 1] = dislev[i1][i2] + cost_sub;
00201 
00202       /* Insertion and deletion costs */
00203       if (dislev[i1 + 1][i2] + cost_del < dislev[i1 + 1][i2 + 1])
00204   dislev[i1 + 1][i2 + 1] = dislev[i1 + 1][i2] + cost_del;
00205 
00206       if (dislev[i1][i2 + 1] + cost_ins < dislev[i1 + 1][i2 + 1])
00207   dislev[i1 + 1][i2 + 1] = dislev[i1][i2 + 1] + cost_ins;
00208     }
00209 
00210   d = dislev[ivec_length (v1)][ivec_length (v2)];
00211   imat_delete (dislev);
00212   return d;
00213 }
00214 
00215 /*-----------------------------------------------------------------------*/
00216 double vec_distance_norm (vec v1, vec v2, double norm)
00217 {
00218   double s = 0;
00219   idx_t i;
00220   assert (vec_length (v1) == vec_length (v2));
00221 
00222   for (i = 0; i < vec_length (v1); i++)
00223     s += pow (fabs (v1[i] - v2[i]), norm);
00224 
00225   return pow (s, 1. / (double) norm);
00226 }
00227 
00228 
00229 double mat_distance_norm (mat m1, mat m2, double norm)
00230 {
00231   double s = 0;
00232   idx_t i, j;
00233   assert (mat_width (m1) == mat_width (m2));
00234   assert (mat_height (m1) == mat_height (m2));
00235 
00236   for (i = 0; i < mat_height (m1); i++)
00237     for (j = 0; j < mat_width (m1); j++)
00238       s += pow (fabs (m1[i][j] - m2[i][j]), norm);
00239 
00240   return pow (s, 1. / (double) norm);
00241 }
00242 
00243 
00244 int ivec_distance_norm1 (ivec v1, ivec v2)
00245 {
00246   int s = 0;
00247   idx_t i;
00248   assert (ivec_length (v1) == ivec_length (v2));
00249 
00250   for (i = 0; i < ivec_length (v1); i++)
00251     s += abs (v1[i] - v2[i]);
00252 
00253   return s;
00254 }
00255 
00256 
00257 /*-----------------------------------------------------------------------*/
00258 double vec_distance_mse (vec v1, vec v2, double rec_value)
00259 {
00260   double s = 0, d;
00261   idx_t i;
00262   vec  vmin, vmax;    /* vmin is the shortest vector and vmax the longer one */
00263 
00264   if (vec_length (v1) > vec_length (v2)) {
00265     vmin = v2;
00266     vmax = v1;
00267   }
00268   else {
00269     vmin = v1;
00270     vmax = v2;
00271   }
00272 
00273   for (i = 0; i < vec_length (vmin); i++) {
00274     d = vmax[i] - vmin[i];
00275     s += d * d;
00276   }
00277 
00278   for (; i < vec_length (vmax); i++) {
00279     d = vmax[i] - rec_value;
00280     s += d * d;
00281   }
00282   return s / vec_length (vmax);
00283 }
00284 
00285 
00286 /*-----------------------------------------------------------------------*/
00287 double mat_distance_mse (mat m1, mat m2, double rec_value)
00288 {
00289   double s = 0;
00290   idx_t i;
00291   mat  mmin, mmax;    /* mmin is the shortest matrix and mmax the tallest one */
00292 
00293   if (mat_height (m1) > mat_height (m2)) {
00294     mmin = m2;
00295     mmax = m1;
00296   }
00297   else {
00298     mmin = m1;
00299     mmax = m2;
00300   }
00301 
00302   for (i = 0; i < mat_height (mmin); i++)
00303     s += vec_distance_mse (mmax[i], mmin[i], rec_value);
00304 
00305   for (; i < mat_height (mmax); i++)
00306     s += vec_distance_mse (mmax[i], vec_null, rec_value);
00307 
00308   return s / mat_height (mmax);
00309 }
00310 
00311 double ivec_distance_mse (ivec v1, ivec v2, double rec_value)
00312 {
00313   double s = 0, d;
00314   idx_t i;
00315   ivec vmin, vmax;    /* vmin is the shortest vector and vmax the longer one */
00316 
00317   if (ivec_length (v1) > ivec_length (v2)) {
00318     vmin = v2;
00319     vmax = v1;
00320   }
00321   else {
00322     vmin = v1;
00323     vmax = v2;
00324   }
00325 
00326   for (i = 0; i < ivec_length (vmin); i++) {
00327     d = vmax[i] - vmin[i];
00328     s += d * d;
00329   }
00330 
00331   for (; i < ivec_length (vmax); i++) {
00332     d = vmax[i] - rec_value;
00333     s += d * d;
00334   }
00335   return s / (double) ivec_length (vmax);
00336 }
00337 
00338 double imat_distance_mse (imat m1, imat m2, double rec_value)
00339 {
00340   double s = 0;
00341   idx_t i;
00342   imat mmin, mmax;    /* mmin is the shortest matrix and mmax the tallest one */
00343 
00344   if (imat_height (m1) > imat_height (m2)) {
00345     mmin = m2;
00346     mmax = m1;
00347   }
00348   else {
00349     mmin = m1;
00350     mmax = m2;
00351   }
00352 
00353   for (i = 0; i < imat_height (mmin); i++)
00354     s += ivec_distance_mse (mmax[i], mmin[i], rec_value);
00355 
00356   for (; i < imat_height (mmax); i++)
00357     s += ivec_distance_mse (mmax[i], ivec_null, rec_value);
00358 
00359   return s / imat_height (mmax);
00360 }
00361 
00362 
00363 long ivec_distance_sqr (ivec v1, ivec v2)
00364 {
00365   assert (ivec_length (v1) == ivec_length (v2));
00366 
00367   long s = 0, d;
00368   idx_t i;
00369 
00370   for (i = 0; i < ivec_length (v1); i++) {
00371     d = v2[i] - v1[i];
00372     s += d * (long) d;
00373   }
00374 
00375   return s;
00376 }
00377 
00378 
00379 /*-----------------------------------------------------------------------*/
00380 /* Return the Kullback-Leibler pseudo-distance between distribution pdf1 and pdf2. */
00381 double vec_distance_kullback_leibler (vec pdf1, vec pdf2)
00382 {
00383   idx_t i;
00384   double d = 0;
00385   assert (vec_length (pdf1) == vec_length (pdf2));
00386   assert (is_valid_pdf (pdf1, 1e-10) && is_valid_pdf (pdf2, 1e-10));
00387 
00388 
00389   for (i = 0; i < vec_length (pdf1); i++) {
00390     if (pdf1[i] != 0) {
00391       if (pdf2[i] == 0)
00392   return INT_MAX;
00393       else
00394   d += pdf1[i] * log (pdf1[i] / pdf2[i]);
00395     }
00396   }
00397   return d / log (2);
00398 }
00399 
00400 
00401 
00402 
00403 /*-----------------------------------------------------------------------*/
00404 /* compute the matrix of distance associated with a given vector         */
00405 mat compute_distance_matrix (mat v, double nr)
00406 {
00407   int n = mat_height (v), i, j;
00408 
00409   mat dis = mat_new (n, n);
00410 
00411   for (i = 0 ; i < n ; i++)
00412     for (j = 0 ; j < n ; j++) {
00413       dis[i][j] = vec_distance_norm (v[i], v[j], nr);
00414     }
00415 
00416   return dis;
00417 }
00418 
00419 
00420 /*----------------------------------------------------------------*/
00421 /*             Distances between sparse vectors                   */
00422 
00423 /* compute the L1 distance between two sparse vectors */
00424 double spvec_distance_norm1 (ivec svi1, vec sv1, ivec svi2, vec sv2)
00425 {
00426   double s = 0;
00427   idx_t i1 = 0, i2 = 0;
00428 
00429   assert (ivec_length (svi1) == vec_length (sv1));
00430   assert (ivec_length (svi2) == vec_length (sv2));
00431 
00432   while (1) {
00433 
00434     if (i1 == ivec_length (svi1)) {
00435       while (i2 < ivec_length (svi2)) 
00436   s += fabs (sv2[i2++]);
00437       break;
00438     }
00439 
00440     if (i2 == ivec_length (svi2)) {
00441       while (i1 < ivec_length (svi1)) 
00442   s += fabs (sv1[i1++]);
00443       break;
00444     }
00445 
00446     if (svi1[i1] == svi2[i2]) {
00447       s += fabs (sv1[i1] - sv2[i2]);
00448       i1++;
00449       i2++;
00450     }
00451 
00452     else {
00453       if (svi1[i1] < svi2[i2]) 
00454   s += fabs (sv1[i1++]);
00455       else
00456   s += fabs (sv2[i2++]);
00457     }
00458   }
00459 
00460   return s;
00461 }
00462 
00463 
00464 int spivec_distance_norm1 (ivec svi1, ivec sv1, ivec svi2, ivec sv2)
00465 {
00466   int s = 0;
00467   idx_t i1 = 0, i2 = 0;
00468 
00469   assert (ivec_length (svi1) == ivec_length (sv1));
00470   assert (ivec_length (svi2) == ivec_length (sv2));
00471 
00472   while (1) {
00473 
00474     if (i1 == ivec_length (svi1)) {
00475       while (i2 < ivec_length (svi2)) 
00476   s += abs (sv2[i2++]);
00477       break;
00478     }
00479 
00480     if (i2 == ivec_length (svi2)) {
00481       while (i1 < ivec_length (svi1)) 
00482   s += abs (sv1[i1++]);
00483       break;
00484     }
00485 
00486     if (svi1[i1] == svi2[i2]) {
00487       s += abs (sv1[i1] - sv2[i2]);
00488       i1++;
00489       i2++;
00490     }
00491 
00492     else {
00493       if (svi1[i1] < svi2[i2]) 
00494   s += abs (sv1[i1++]);
00495       else
00496   s += abs (sv2[i2++]);
00497     }
00498   }
00499 
00500   return s;
00501 }
00502 
00503 
00504 /* compute the square of the euclidean distance between two sparse vectors */
00505 double spvec_distance_sqr (ivec svi1, vec sv1, ivec svi2, vec sv2)
00506 {
00507   double s = 0;
00508   idx_t i1 = 0, i2 = 0;
00509 
00510   assert (ivec_length (svi1) == vec_length (sv1));
00511   assert (ivec_length (svi2) == vec_length (sv2));
00512 
00513   while (1) {
00514 
00515     if (i1 == ivec_length (svi1)) {
00516       while (i2 < ivec_length (svi2)) {
00517   s += sv2[i2] * sv2[i2];
00518   i2++;
00519       }
00520       break;
00521     }
00522 
00523     if (i2 == ivec_length (svi2)) {
00524       while (i1 < ivec_length (svi1)) {
00525   s += sv1[i1] * sv1[i1];
00526   i1++;
00527       }
00528       break;
00529     }
00530 
00531     if (svi1[i1] == svi2[i2]) {
00532       s += (sv1[i1] - sv2[i2]) * (sv1[i1] - sv2[i2]);
00533       i1++;
00534       i2++;
00535     }
00536 
00537     else {
00538       if (svi1[i1] < svi2[i2]) {
00539   s += sv1[i1] * sv1[i1];
00540   i1++;
00541       }
00542       else {
00543   s += sv2[i2] * sv2[i2];
00544   i2++;
00545       }
00546     }
00547   }
00548 
00549   return s;
00550 }
00551 
00552 
00553 int spivec_distance_sqr (ivec svi1, ivec sv1, ivec svi2, ivec sv2)
00554 {
00555   double s = 0;
00556   idx_t i1 = 0, i2 = 0;
00557 
00558   assert (ivec_length (svi1) == ivec_length (sv1));
00559   assert (ivec_length (svi2) == ivec_length (sv2));
00560 
00561   while (1) {
00562 
00563     if (i1 == ivec_length (svi1)) {
00564       while (i2 < ivec_length (svi2)) {
00565   s += sv2[i2] * sv2[i2];
00566   i2++;
00567       }
00568       break;
00569     }
00570 
00571     if (i2 == ivec_length (svi2)) {
00572       while (i1 < ivec_length (svi1)) {
00573   s += sv1[i1] * sv1[i1];
00574   i1++;
00575       }
00576       break;
00577     }
00578 
00579     if (svi1[i1] == svi2[i2]) {
00580       s += (sv1[i1] - sv2[i2]) * (sv1[i1] - sv2[i2]);
00581       i1++;
00582       i2++;
00583     }
00584 
00585     else {
00586       if (svi1[i1] < svi2[i2]) {
00587   s += sv1[i1] * sv1[i1];
00588   i1++;
00589       }
00590       else {
00591   s += sv2[i2] * sv2[i2];
00592   i2++;
00593       }
00594     }
00595   }
00596 
00597   return s;
00598 }
00599 
00600 
00601 double spvec_distance_norm2 (ivec svi1, vec sv1, ivec svi2, vec sv2)
00602 {
00603   return sqrt (spvec_distance_sqr (svi1, sv1, svi2, sv2));
00604 }

Hosted by
Copyright (C) 2005-2006 Hervé Jégou
Vivien Chappelier
Francois Cayre
libit logo courtesy of Jonathan Delhumeau