package Huffman; use strict; use Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); use Data::Dumper; $VERSION = 1.00; @ISA = qw(Exporter); @EXPORT = (); @EXPORT_OK = qw(&freq &build_tree &print_tree &build_encodings &encode_str &decode_str); %EXPORT_TAGS = (All => [qw(&freq &build_tree &print_tree &build_encodings &encode_str &decode_str)]); # # counts frequency of each character # returns hash: {char => count} # sub freq { my $string = shift @_; my %count = (); my @array = split(//,$string); map { $count{$_}++ } @array; return %count; } # # build binary tree based on frequency counts # returns hash based binary tree of anonymous hashes # sub build_tree { my $string = shift @_; my %count = freq($string); my $tree; # tree that gets returned my $sum = 0; # sort chars by frequency my @sorted = sort { $count{$b} <=> $count{$a} } keys %count; # stores sub trees as they are built my @trees = (); # initialize @trees - each char is a sub tree foreach my $char (@sorted) { push(@trees,{ CHAR=>$char,FREQ=>$count{$char},LEFT=>undef,RIGHT=>undef }); } @sorted = undef; # iteratively build up the final tree by joining the lowest 2 counts on each pass while (@trees != 1){ my $node1 = pop(@trees); my $node2 = pop(@trees); my $freq_sum = $node1->{FREQ} + $node2->{FREQ}; # create new node out of the 2 lowest push(@trees,{ CHAR=>undef,FREQ=>$freq_sum,LEFT=>$node1,RIGHT=>$node2 }); # sort @trees based on newest FREQ value @trees = sort { $b->{FREQ} <=> $a->{FREQ} } @trees; }; $tree = pop(@trees); return $tree; } # # print tree given a tree as returned from build_tree # sub print_tree { print Dumper(@_); } # # Traverse - visits each leaf starting with the left most leaf # Whenever a leaf is reached, the path (a string of 1's and 0's) # sub traverse { my $tree = shift @_; my $bit = shift @_; my @paths = (); if (!$tree) { return; } else { push(@paths,traverse($tree->{LEFT},$bit."0")); push(@paths,traverse($tree->{RIGHT},$bit."1")); if (defined($tree->{CHAR})) { return {CHAR=>$tree->{CHAR},FREQ=>$tree->{FREQ},ENCODING=>$bit}; } } return @paths; } # # build binary encodings, returns as hash # returns an array of anonymous hashes that contains: # CHAR, FREQ, and ENCODING # sub build_encodings { my $string = shift @_; my $tree = build_tree($string); my @paths = traverse($tree,''); return @paths; } # # encode text # sub encode_str { my $string = shift @_; my @encodings = build_encodings($string); foreach my $item (@encodings) { $string =~ s/[$item->{CHAR}]/$item->{ENCODING}/g; } return $string; } # # decodes 1's and 0's - requires an encoding table and an encoded string # sub decode_str { my $encoded = shift @_; my @encodings = @_; my $decoded = ''; # puts encoded binary string into array my @bits = split(//,$encoded); my $char = ''; # cycle over array of encoded bits, and look up in table foreach my $bit (@bits) { $char .= $bit; foreach my $item (@encodings) { if ($item->{ENCODING} eq $char) { $decoded .= "$item->{CHAR}"; $char = ''; } } } return $decoded; }