# https://rt.cpan.org/Public/Bug/Display.html?id=58572 --- Nilsimsa.pm.orig +++ Nilsimsa.pm @@ -37,6 +37,41 @@ wrapper around nilsimsa implementation i Pass in any text, any size, and get back a digest string composed 64 hex chars. +=item $nilsimsa->hex_distance($n1, $n2); + +Compare two nilsimsa digests n1 and n1 given as hex chars. +The return value is a number in the range -128 .. 127, where any value over 24 +(which is 3 sigma) is believed to indicate that the two texts are not generated +independently. + +=cut +# 773e2df0a02a319ec34a0b71d54029111da90838cbc20ecd3d2d4e18c25a3025 +# 47182cf0802a11dec24a3b75d5042d310ca90838c9d20ecc3d610e98560a3645 +# The nilsimsa of these two codes is 92 on a scale of -128 to +128. That means that +# 36 bits are different and 220 bits the same. Any nilsimsa over 24 (which is 3 +# sigma) indicates that the two messages are probably not independently +# generated. + +sub hex_distance +{ + shift @_ if ref $_[0]; + + # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + my @bit_diff = ( 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); + + my @n1 = split //, $_[0]; + my @n2 = split //, $_[1]; + die "length differs", if scalar(@n1) != scalar (@n2); + my $val = scalar(@n1) * 4; + for my $i (0..$#n1) + { + next if $n1[$i] eq $n2[$i]; + my ($v1,$v2) = (hex($n1[$i]), hex($n2[$i])); + $val -= $bit_diff[$v1 ^ $v2]; + } + return $val - scalar(@n1)*2; +} + =back =head1 SEE ALSO --- t/01-hex_distance.t.orig +++ t/01-hex_distance.t @@ -0,0 +1,19 @@ +#!perl +use strict; +use warnings; +use Test::More tests => 2; + +use Digest::Nilsimsa; + +my @n = qw(773e2df0a02a319ec34a0b71d54029111da90838cbc20ecd3d2d4e18c25a3025 + 47182cf0802a11dec24a3b75d5042d310ca90838c9d20ecc3d610e98560a3645); + +# from http://ixazon.dynip.com/~cmeclax/nilsimsa.html: +# The nilsimsa of these two codes is 92 on a scale of -128 to +128. That means +# that 36 bits are different and 220 bits the same. Any nilsimsa over 24 (which +# is 3 sigma) indicates that the two messages are probably not independently +# generated. + +cmp_ok(Digest::Nilsimsa::hex_distance(@n), '==', 92, 'hex_distance class method'); +cmp_ok(Digest::Nilsimsa::hex_distance({}, @n), '==', 92, 'hex_distance obj method'); + --- MANIFEST.orig +++ MANIFEST @@ -7,6 +7,7 @@ README _nilsimsa.c nilsimsa.h t/00_load.t +t/01-hex_distance.t t/a.txt t/b.txt t/d.txt