00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <it/types.h>
00026 #include <it/distance.h>
00027 #include <it/source_func.h>
00028 #include <math.h>
00029
00030
00031 int vec_distance_hamming (vec v1, vec v2)
00032 {
00033 int lmin, lmax, d = 0;
00034 idx_t i;
00035
00036 if (vec_length (v1) > vec_length (v2)) {
00037 lmin = vec_length (v2);
00038 lmax = vec_length (v1);
00039 }
00040 else {
00041 lmin = vec_length (v1);
00042 lmax = vec_length (v2);
00043 }
00044 for (i = 0; i < lmin; i++)
00045 if (v1[i] != v2[i])
00046 d++;
00047 return d + lmax - lmin;
00048 }
00049
00050
00051 int ivec_distance_hamming (ivec v1, ivec v2)
00052 {
00053 int lmin, lmax, d = 0;
00054 idx_t i;
00055
00056 if (ivec_length (v1) > ivec_length (v2)) {
00057 lmin = ivec_length (v2);
00058 lmax = ivec_length (v1);
00059 }
00060 else {
00061 lmin = ivec_length (v1);
00062 lmax = ivec_length (v2);
00063 }
00064 for (i = 0; i < lmin; i++)
00065 if (v1[i] != v2[i])
00066 d++;
00067 return d + lmax - lmin;
00068 }
00069
00070
00071 int bvec_distance_hamming (bvec v1, bvec v2)
00072 {
00073 int lmin, lmax, d = 0;
00074 idx_t i;
00075
00076 if (bvec_length (v1) > bvec_length (v2)) {
00077 lmin = bvec_length (v2);
00078 lmax = bvec_length (v1);
00079 }
00080 else {
00081 lmin = bvec_length (v1);
00082 lmax = bvec_length (v2);
00083 }
00084 for (i = 0; i < lmin; i++)
00085 if (v1[i] != v2[i])
00086 d++;
00087 return d + lmax - lmin;
00088 }
00089
00090
00091 int cvec_distance_hamming (cvec v1, cvec v2)
00092 {
00093 int lmin, lmax, d = 0;
00094 idx_t i;
00095
00096 if (cvec_length (v1) > cvec_length (v2)) {
00097 lmin = cvec_length (v2);
00098 lmax = cvec_length (v1);
00099 }
00100 else {
00101 lmin = cvec_length (v1);
00102 lmax = cvec_length (v2);
00103 }
00104 for (i = 0; i < lmin; i++)
00105 if (!ceq (v1[i], v2[i]))
00106 d++;
00107 return d + lmax - lmin;
00108 }
00109
00110
00111
00112 double vec_ser (vec v1, vec v2)
00113 {
00114 int lmin, lmax, d = 0;
00115 idx_t i;
00116
00117 if (vec_length (v1) > vec_length (v2)) {
00118 lmin = vec_length (v2);
00119 lmax = vec_length (v1);
00120 }
00121 else {
00122 lmin = vec_length (v1);
00123 lmax = vec_length (v2);
00124 }
00125 for (i = 0; i < lmin; i++)
00126 if (v1[i] != v2[i])
00127 d++;
00128
00129 return (d + lmax - vec_length (v2)) / (double) vec_length (v1);
00130 }
00131
00132
00133 double ivec_ser (ivec v1, ivec v2)
00134 {
00135 int lmin, lmax, d = 0;
00136 idx_t i;
00137
00138 if (ivec_length (v1) > ivec_length (v2)) {
00139 lmin = ivec_length (v2);
00140 lmax = ivec_length (v1);
00141 }
00142 else {
00143 lmin = ivec_length (v1);
00144 lmax = ivec_length (v2);
00145 }
00146 for (i = 0; i < lmin; i++)
00147 if (v1[i] != v2[i])
00148 d++;
00149
00150 return (d + lmax - ivec_length (v2)) / (double) ivec_length (v1);
00151 }
00152
00153
00154 double bvec_ber (bvec v1, bvec v2)
00155 {
00156 int lmin, lmax, d = 0;
00157 idx_t i;
00158
00159 if (bvec_length (v1) > bvec_length (v2)) {
00160 lmin = bvec_length (v2);
00161 lmax = bvec_length (v1);
00162 }
00163 else {
00164 lmin = bvec_length (v1);
00165 lmax = bvec_length (v2);
00166 }
00167 for (i = 0; i < lmin; i++)
00168 if (v1[i] != v2[i])
00169 d++;
00170
00171 return (d + lmax - bvec_length (v2)) / (double) bvec_length (v1);
00172 }
00173
00174
00175
00176
00177 int ivec_distance_levenshtein (ivec v1, ivec v2, int cost_ins, int cost_del,
00178 int cost_sub)
00179 {
00180
00181 idx_t i1, i2;
00182 int d;
00183
00184 imat dislev = imat_new (ivec_length (v1) + 1, ivec_length (v2) + 1);
00185 imat_set (dislev, 2 * ivec_length (v1));
00186
00187
00188 for (i1 = 0; i1 < ivec_length (v1) + 1; i1++)
00189 dislev[i1][0] = i1;
00190
00191 for (i2 = 0; i2 < ivec_length (v2) + 1; i2++)
00192 dislev[0][i2] = i2;
00193
00194 for (i1 = 0; i1 < ivec_length (v1); i1++)
00195 for (i2 = 0; i2 < ivec_length (v2); i2++) {
00196
00197 if (v1[i1] == v2[i2])
00198 dislev[i1 + 1][i2 + 1] = dislev[i1][i2];
00199 else
00200 dislev[i1 + 1][i2 + 1] = dislev[i1][i2] + cost_sub;
00201
00202
00203 if (dislev[i1 + 1][i2] + cost_del < dislev[i1 + 1][i2 + 1])
00204 dislev[i1 + 1][i2 + 1] = dislev[i1 + 1][i2] + cost_del;
00205
00206 if (dislev[i1][i2 + 1] + cost_ins < dislev[i1 + 1][i2 + 1])
00207 dislev[i1 + 1][i2 + 1] = dislev[i1][i2 + 1] + cost_ins;
00208 }
00209
00210 d = dislev[ivec_length (v1)][ivec_length (v2)];
00211 imat_delete (dislev);
00212 return d;
00213 }
00214
00215
00216 double vec_distance_norm (vec v1, vec v2, double norm)
00217 {
00218 double s = 0;
00219 idx_t i;
00220 assert (vec_length (v1) == vec_length (v2));
00221
00222 for (i = 0; i < vec_length (v1); i++)
00223 s += pow (fabs (v1[i] - v2[i]), norm);
00224
00225 return pow (s, 1. / (double) norm);
00226 }
00227
00228
00229 double mat_distance_norm (mat m1, mat m2, double norm)
00230 {
00231 double s = 0;
00232 idx_t i, j;
00233 assert (mat_width (m1) == mat_width (m2));
00234 assert (mat_height (m1) == mat_height (m2));
00235
00236 for (i = 0; i < mat_height (m1); i++)
00237 for (j = 0; j < mat_width (m1); j++)
00238 s += pow (fabs (m1[i][j] - m2[i][j]), norm);
00239
00240 return pow (s, 1. / (double) norm);
00241 }
00242
00243
00244 int ivec_distance_norm1 (ivec v1, ivec v2)
00245 {
00246 int s = 0;
00247 idx_t i;
00248 assert (ivec_length (v1) == ivec_length (v2));
00249
00250 for (i = 0; i < ivec_length (v1); i++)
00251 s += abs (v1[i] - v2[i]);
00252
00253 return s;
00254 }
00255
00256
00257
00258 double vec_distance_mse (vec v1, vec v2, double rec_value)
00259 {
00260 double s = 0, d;
00261 idx_t i;
00262 vec vmin, vmax;
00263
00264 if (vec_length (v1) > vec_length (v2)) {
00265 vmin = v2;
00266 vmax = v1;
00267 }
00268 else {
00269 vmin = v1;
00270 vmax = v2;
00271 }
00272
00273 for (i = 0; i < vec_length (vmin); i++) {
00274 d = vmax[i] - vmin[i];
00275 s += d * d;
00276 }
00277
00278 for (; i < vec_length (vmax); i++) {
00279 d = vmax[i] - rec_value;
00280 s += d * d;
00281 }
00282 return s / vec_length (vmax);
00283 }
00284
00285
00286
00287 double mat_distance_mse (mat m1, mat m2, double rec_value)
00288 {
00289 double s = 0;
00290 idx_t i;
00291 mat mmin, mmax;
00292
00293 if (mat_height (m1) > mat_height (m2)) {
00294 mmin = m2;
00295 mmax = m1;
00296 }
00297 else {
00298 mmin = m1;
00299 mmax = m2;
00300 }
00301
00302 for (i = 0; i < mat_height (mmin); i++)
00303 s += vec_distance_mse (mmax[i], mmin[i], rec_value);
00304
00305 for (; i < mat_height (mmax); i++)
00306 s += vec_distance_mse (mmax[i], vec_null, rec_value);
00307
00308 return s / mat_height (mmax);
00309 }
00310
00311 double ivec_distance_mse (ivec v1, ivec v2, double rec_value)
00312 {
00313 double s = 0, d;
00314 idx_t i;
00315 ivec vmin, vmax;
00316
00317 if (ivec_length (v1) > ivec_length (v2)) {
00318 vmin = v2;
00319 vmax = v1;
00320 }
00321 else {
00322 vmin = v1;
00323 vmax = v2;
00324 }
00325
00326 for (i = 0; i < ivec_length (vmin); i++) {
00327 d = vmax[i] - vmin[i];
00328 s += d * d;
00329 }
00330
00331 for (; i < ivec_length (vmax); i++) {
00332 d = vmax[i] - rec_value;
00333 s += d * d;
00334 }
00335 return s / (double) ivec_length (vmax);
00336 }
00337
00338 double imat_distance_mse (imat m1, imat m2, double rec_value)
00339 {
00340 double s = 0;
00341 idx_t i;
00342 imat mmin, mmax;
00343
00344 if (imat_height (m1) > imat_height (m2)) {
00345 mmin = m2;
00346 mmax = m1;
00347 }
00348 else {
00349 mmin = m1;
00350 mmax = m2;
00351 }
00352
00353 for (i = 0; i < imat_height (mmin); i++)
00354 s += ivec_distance_mse (mmax[i], mmin[i], rec_value);
00355
00356 for (; i < imat_height (mmax); i++)
00357 s += ivec_distance_mse (mmax[i], ivec_null, rec_value);
00358
00359 return s / imat_height (mmax);
00360 }
00361
00362
00363 long ivec_distance_sqr (ivec v1, ivec v2)
00364 {
00365 assert (ivec_length (v1) == ivec_length (v2));
00366
00367 long s = 0, d;
00368 idx_t i;
00369
00370 for (i = 0; i < ivec_length (v1); i++) {
00371 d = v2[i] - v1[i];
00372 s += d * (long) d;
00373 }
00374
00375 return s;
00376 }
00377
00378
00379
00380
00381 double vec_distance_kullback_leibler (vec pdf1, vec pdf2)
00382 {
00383 idx_t i;
00384 double d = 0;
00385 assert (vec_length (pdf1) == vec_length (pdf2));
00386 assert (is_valid_pdf (pdf1, 1e-10) && is_valid_pdf (pdf2, 1e-10));
00387
00388
00389 for (i = 0; i < vec_length (pdf1); i++) {
00390 if (pdf1[i] != 0) {
00391 if (pdf2[i] == 0)
00392 return INT_MAX;
00393 else
00394 d += pdf1[i] * log (pdf1[i] / pdf2[i]);
00395 }
00396 }
00397 return d / log (2);
00398 }
00399
00400
00401
00402
00403
00404
00405 mat compute_distance_matrix (mat v, double nr)
00406 {
00407 int n = mat_height (v), i, j;
00408
00409 mat dis = mat_new (n, n);
00410
00411 for (i = 0 ; i < n ; i++)
00412 for (j = 0 ; j < n ; j++) {
00413 dis[i][j] = vec_distance_norm (v[i], v[j], nr);
00414 }
00415
00416 return dis;
00417 }
00418
00419
00420
00421
00422
00423
00424 double spvec_distance_norm1 (ivec svi1, vec sv1, ivec svi2, vec sv2)
00425 {
00426 double s = 0;
00427 idx_t i1 = 0, i2 = 0;
00428
00429 assert (ivec_length (svi1) == vec_length (sv1));
00430 assert (ivec_length (svi2) == vec_length (sv2));
00431
00432 while (1) {
00433
00434 if (i1 == ivec_length (svi1)) {
00435 while (i2 < ivec_length (svi2))
00436 s += fabs (sv2[i2++]);
00437 break;
00438 }
00439
00440 if (i2 == ivec_length (svi2)) {
00441 while (i1 < ivec_length (svi1))
00442 s += fabs (sv1[i1++]);
00443 break;
00444 }
00445
00446 if (svi1[i1] == svi2[i2]) {
00447 s += fabs (sv1[i1] - sv2[i2]);
00448 i1++;
00449 i2++;
00450 }
00451
00452 else {
00453 if (svi1[i1] < svi2[i2])
00454 s += fabs (sv1[i1++]);
00455 else
00456 s += fabs (sv2[i2++]);
00457 }
00458 }
00459
00460 return s;
00461 }
00462
00463
00464 int spivec_distance_norm1 (ivec svi1, ivec sv1, ivec svi2, ivec sv2)
00465 {
00466 int s = 0;
00467 idx_t i1 = 0, i2 = 0;
00468
00469 assert (ivec_length (svi1) == ivec_length (sv1));
00470 assert (ivec_length (svi2) == ivec_length (sv2));
00471
00472 while (1) {
00473
00474 if (i1 == ivec_length (svi1)) {
00475 while (i2 < ivec_length (svi2))
00476 s += abs (sv2[i2++]);
00477 break;
00478 }
00479
00480 if (i2 == ivec_length (svi2)) {
00481 while (i1 < ivec_length (svi1))
00482 s += abs (sv1[i1++]);
00483 break;
00484 }
00485
00486 if (svi1[i1] == svi2[i2]) {
00487 s += abs (sv1[i1] - sv2[i2]);
00488 i1++;
00489 i2++;
00490 }
00491
00492 else {
00493 if (svi1[i1] < svi2[i2])
00494 s += abs (sv1[i1++]);
00495 else
00496 s += abs (sv2[i2++]);
00497 }
00498 }
00499
00500 return s;
00501 }
00502
00503
00504
00505 double spvec_distance_sqr (ivec svi1, vec sv1, ivec svi2, vec sv2)
00506 {
00507 double s = 0;
00508 idx_t i1 = 0, i2 = 0;
00509
00510 assert (ivec_length (svi1) == vec_length (sv1));
00511 assert (ivec_length (svi2) == vec_length (sv2));
00512
00513 while (1) {
00514
00515 if (i1 == ivec_length (svi1)) {
00516 while (i2 < ivec_length (svi2)) {
00517 s += sv2[i2] * sv2[i2];
00518 i2++;
00519 }
00520 break;
00521 }
00522
00523 if (i2 == ivec_length (svi2)) {
00524 while (i1 < ivec_length (svi1)) {
00525 s += sv1[i1] * sv1[i1];
00526 i1++;
00527 }
00528 break;
00529 }
00530
00531 if (svi1[i1] == svi2[i2]) {
00532 s += (sv1[i1] - sv2[i2]) * (sv1[i1] - sv2[i2]);
00533 i1++;
00534 i2++;
00535 }
00536
00537 else {
00538 if (svi1[i1] < svi2[i2]) {
00539 s += sv1[i1] * sv1[i1];
00540 i1++;
00541 }
00542 else {
00543 s += sv2[i2] * sv2[i2];
00544 i2++;
00545 }
00546 }
00547 }
00548
00549 return s;
00550 }
00551
00552
00553 int spivec_distance_sqr (ivec svi1, ivec sv1, ivec svi2, ivec sv2)
00554 {
00555 double s = 0;
00556 idx_t i1 = 0, i2 = 0;
00557
00558 assert (ivec_length (svi1) == ivec_length (sv1));
00559 assert (ivec_length (svi2) == ivec_length (sv2));
00560
00561 while (1) {
00562
00563 if (i1 == ivec_length (svi1)) {
00564 while (i2 < ivec_length (svi2)) {
00565 s += sv2[i2] * sv2[i2];
00566 i2++;
00567 }
00568 break;
00569 }
00570
00571 if (i2 == ivec_length (svi2)) {
00572 while (i1 < ivec_length (svi1)) {
00573 s += sv1[i1] * sv1[i1];
00574 i1++;
00575 }
00576 break;
00577 }
00578
00579 if (svi1[i1] == svi2[i2]) {
00580 s += (sv1[i1] - sv2[i2]) * (sv1[i1] - sv2[i2]);
00581 i1++;
00582 i2++;
00583 }
00584
00585 else {
00586 if (svi1[i1] < svi2[i2]) {
00587 s += sv1[i1] * sv1[i1];
00588 i1++;
00589 }
00590 else {
00591 s += sv2[i2] * sv2[i2];
00592 i2++;
00593 }
00594 }
00595 }
00596
00597 return s;
00598 }
00599
00600
00601 double spvec_distance_norm2 (ivec svi1, vec sv1, ivec svi2, vec sv2)
00602 {
00603 return sqrt (spvec_distance_sqr (svi1, sv1, svi2, sv2));
00604 }