#!/usr/bin/perl use strict; use Getopt::Long; #my $outdir = 'bylsa'; my $tolerance = 5; my $trim = ''; my $justone = ''; #GetOptions("outdir=s", \$outdir, "iterlimit=i", \$iterlimit); GetOptions("tolerance=i", \$tolerance, "trim!", \$trim, "justone!", \$justone); print $justone; #my %boundaries; my %boundaryloc; foreach my $file (@ARGV) { open READ, $file; $file =~ s#/.*## if $trim; my $curline = 1; #my $prevtime = "00:00"; while () { #if (/^(\d\d:\d\d)/) { #$prevtime = $1; #} #elsif (/^#boundary/) { # my $nexttime = ; #chomp $nexttime; #$nexttime =~ s/^(\d\d:\d\d).*/$1/; #push @{$boundaries{$file}}, "$prevtime/$nexttime"; #} # if (/^#boundary/) { $boundaryloc{$file}{$curline} = $curline; } else { $curline++; } } } my %correlation; my %numboundaries; my %matches; foreach my $file (keys %boundaryloc) { $numboundaries{$file} = scalar keys(%{$boundaryloc{$file}}); print "$file has $numboundaries{$file} boundaries\n"; foreach my $offset (keys %{$boundaryloc{$file}}) { OTHERFILE: foreach my $other (keys %boundaryloc) { next if $other eq $file; foreach my $otheroff (($offset-$tolerance)..($offset+$tolerance)) { if ($boundaryloc{$other}{$otheroff}) { #print "$file$offset/$other$otheroff\n"; #$correlation{$file}{$other}++ unless ($justone && scalar @{$matches{$file}{$other}{$offset}}); $correlation{$file}{$other}++ unless ($justone && scalar $matches{$file}{$other}{$otheroff}); $matches{$file}{$other}{$otheroff} = $offset; next OTHERFILE if $justone; #finding multiple boundaries from #somebody else is good -- they #think it's redundant, and you #correlate with all of them } } } } } #my %index; #my %correlation; #foreach (keys %boundaries) { ##print $boundaries{$_}[0] . "\n"; #print "$_ has " . scalar @{$boundaries{$_}} . " boundaries.\n"; #$index{$_} = 0; #foreach my $other (keys %boundaries) { #next if $other eq $_; #if (&boundariesmatch($boundaries{$_}[0], $boundaries{$other}[0])) { #$correlation{$_}{$other}++; #} #} #} #while (!allIndeciesAtBottom(\%boundaries, \%index)) { ##find an earliest one #my $earliesttime = "24:00/24:00"; #my $earliest; #foreach (keys %boundaries) { #if (earlier($boundaries{$_}[$index{$_}], $earliesttime)) { #$earliest = $_; #$earliesttime= $boundaries{$_}[$index{$_}]; #} #} #last if $earliesttime eq "24:00/24:00"; # hack ##advance it ##print "$earliest^".$index{$earliest}++ . " "; #$index{$earliest}++; #check to see if it matches any others #foreach (keys %boundaries) { #next if $_ eq $earliest; #if (boundariesmatch($boundaries{$earliest}[$index{$earliest}], $boundaries{$_}[$index{$_}])) { ##stick the match in both their cells #$correlation{$earliest}{$_}++; #$correlation{$_}{$earliest}++; #} #} #} foreach my $file (sort keys %correlation) { print "*** Correlation for $file:\n"; foreach my $other (keys %{$correlation{$file}}) { #print "$correlation{$file}{$other}\t$other\n"; printf "$correlation{$file}{$other}:%.3f\t$other\n", $correlation{$file}{$other} / $numboundaries{$other}; } } sub allIndeciesAtBottom { my $boundhash = shift; my $indexhash = shift; my $atbottom = 1; foreach (keys %$boundhash) { $atbottom = 0 if ($indexhash->{$_} < $#{$boundhash->{$_}}); #print $indexhash->{$_} ."<". $#{$boundhash->{$_}} . "?"; #print $indexhash->{$_} < $#{$boundhash->{$_}}; #print "\n"; } return $atbottom; } sub boundariesmatch { my @b1 = split m#/#, shift; my @b2 = split m#/#, shift; if ($b1[0] eq $b2[0] || $b1[0] eq $b2[1] || $b1[1] eq $b2[1] || $b1[1] eq $b2[0]) { return 1; } else { return 0; } } sub earlier { return 0 if $_[0] eq ''; #print "$_[0]*$_[1]_" . ($_[0] lt $_[1]); return $_[0] lt $_[1]; #my @time1 = split m#/#, shift; #my @time2 = split m#/#, shift; # #if ($time1[0] eq $time2[0]) { #if ($time1[1] eq $time2[1]) { #return 0; #} #if ($time1[1] le }