lair of the dustbunny: Higher Order Perl (Python Style) : Chapter 1


#
# Recursion and Callbacks
#

# sub binary {
#   my ($n) = @_;
#   return $n if $n == 0 || $n == 1;

#   my $k = int($n/2);
#   my $b = $n % 2;

#   my $E = binary($k);

#   return $E . $b;
# }
def binary(n):
    if n in (0,1):
        return str(n)

    k = n / 2
    b = n % 2

    return binary(k) + str(b)


# sub factorial {
#   my ($n) = @_;
#   return factorial($n-1) * $n;
# }
def factorial(n):
    return factorial(n-1) * n


# sub factorial {
#   my ($n) = @_;
#   return 1 if $n == 0;
#   return factorial($n-1) * $n;
# }
def factorial(n):
    if n == 0:
        return 1
    return factorial(n-1) * n

# sub hanoi {
#   my ($n, $start, $end, $extra) = @_;
#   if ($n == 1) {
#     print "Move disk #1 from $start to $end.\n";  # Step 1
#   } else {
#     hanoi($n-1, $start, $extra, $end);            # Step 2
#     print "Move disk #$n from $start to $end.\n"; # Step 3
#     hanoi($n-1, $extra, $end, $start);            # Step 4
#   }
# }
def hanoi(n, start, end, extra):
    if n == 1:
        print "Move disk #1 from %(start)s to %(end)s." % locals()
    else:
        hanoi(n-1, start, extra, end)
        print "Move disk #%(n)s from %(start)s to %(end)s." % locals()
        hanoi(n-1, extra, end, start)
        


# sub hanoi {
#   my ($n, $start, $end, $extra, $move_disk) = @_;
#   if ($n == 1) {
#     $move_disk->(1, $start, $end);
#   } else {
#     hanoi($n-1, $start, $extra, $end, $move_disk);
#     $move_disk->($n, $start, $end);
#     hanoi($n-1, $extra, $end, $start, $move_disk);
#   }
# }
def hanoi(n, start, end, extra, move_disk):
    if n == 1:
        move_disk(1, start, end)
    else:
        hanoi(n-1, start, extra, end, move_disk)
        move_disk(n, start, end)
        hanoi(n-1, extra, end, start, move_disk)
    

# sub print_instruction {
#   my ($disk, $start, $end) = @_;
#   print "Move disk #$disk from $start to $end.\n";
# }
# hanoi(3, 'A', 'C', 'B', \&print_instruction);
def print_instruction(disk, start, end):
    print "Move disk %(disk)s from %(start)s to %(end)s" % locals()

hanoi(3, "A", "C", "B", print_instruction)




# @position = ('', ('A') x 3); # Disks are all initially on peg A

# sub check_move {
#     my $i;
#     my ($disk, $start, $end) = @_;

#     if ($disk < 1 || $disk > $#position) {
#        die "Bad disk number $disk. Should be 1..$#position.\n";
#     }

#     unless ($position[$disk] eq $start) {
#        die "Tried to move disk $disk from $start, but it is on peg
#                                                       $position[$disk].\n";
#     }

#     for $i (1 .. $disk-1) {
#        if ($position[$i] eq $start) {
#            die "Can't move disk $disk from $start because $i is on top of it.\n";
#        } elsif ($position[$i] eq $end) {
#            die "Can't move disk $disk to $end because $i is already there.\n";
#        }
#     }

#     $position[$disk] = $end;
# }

# hanoi(3, 'A', 'C', 'B', \&check_move);
position = [""] + ["A"] * 3

def check_move(disk, start, end):
    if disk < 1 or disk > len(position) - 1:
        sys.exit("Bad disk number %s. Should be 1..%s" % (disk, len(position)-1))

    if position[disk] != start:
        sys.exit("Tried to move disk %s from %s, but it is on peg %s" % (disk, start, position[disk]))

    for i in range(1, disk):
        if position[i] == start:
            sys.exit("Can't move disk %s from %s because %s is on top of it." % (disk, start, i))
        elif position[i] == end:
            sys.exit("Can't move disk %s to %s because %s is already there." % (disk, end, i))

    position[disk] = end
    print position

hanoi(3, "A", "C", "B", check_move)




# sub total_size {
#    my ($top) = @_;
#    my $total = -s $top;

#    return $total if -f $top;
#    unless (opendir DIR, $top) {
#       warn "Couldn’t open directory $top: $!; skipping.\n";
#       return $total;
#    }

#    my $file;
#    while ($file = readdir DIR) {
#      next if $file eq '.' || $file eq '..';
#      $total += total_size("$top/$file");
#    }

#    closedir DIR;
#    return $total;
# }

# NOTE: in python you'd probably prefer
#       os.walk
# ALSO: it would be unnatural to duplicate the 
#       buggy first version of this function so we'll
#       just be satisfied with it working. :) 
def total_size(top):
    total = os.path.getsize(top)

    if os.path.isfile(top):
        return total

    try:
        for filename in os.listdir(top):
            total += total_size(os.path.join(top, filename))
    except OSError:
        print "Couldn't open directory: %s; skipping." % top
        return total
    return total

# sub dir_walk {
#   my ($top, $code) = @_;
#   my $DIR;
#   $code->($top);
#   if (-d $top) {
#     my $file;
#     unless (opendir $DIR, $top) {
#       warn "Couldn’t open directory $top: $!; skipping.\n";
#       return;
#     }
#     while ($file = readdir $DIR) {
#       next if $file eq '.' || $file eq '..';
#       dir_walk("$top/$file", $code);
#     }
#   }
# }

# the traditional python way would be to use os.walk()
def dir_walk(top, code):
    code(top)
    
    if os.path.isdir(top):
        try:
            for filename in os.listdir(top):
                dir_walk(os.path.join(top, filename), code)
        except OSError:
            print "Couldn't open directory: %s; skipping." % top


        
# sub print_dir {
#   print $_[0], "\n";
# }
def print_dir(x):
    print x

#dir_walk('.', sub { printf "%6d %s\n", -s $_[0], $_[0] } );
dir_walk('.', lambda x: sys.stdout.write("%6d %s\n" % (os.path.getsize(x), x)))

#dir_walk('.', sub { print $_[0], "\n" if -l $_[0] && ! -e $_[0] });
dir_walk('.', lambda x: (os.path.islink(x) and os.path.exists(os.readlink(x)) and sys.stdout.write("%s\n" % x)))

# my $TOTAL = 0;
# dir_walk('.', sub { $TOTAL += -s $_[0] });
# print "Total size is $TOTAL.\n";

# can't run += or global w/in lambda
total = 0
def accumulate(x):
    global total
    total += os.path.getsize(x)
dir_walk('.', accumulate)
print "Total size is %s." % total


# sub dir_walk { 
#   my ($top, $filefunc, $dirfunc) = @_;
#   my $DIR;
#   if (-d $top) {
#     my $file;
#     unless (opendir $DIR, $top) {
#       warn "Couldn’t open directory $code: $!; skipping.\n";
#       return;
#     }
#     my @results;
#     while ($file = readdir $DIR) {
#       next if $file eq '.' || $file eq '..';
#       push @results, dir_walk("$top/$file", $filefunc, $dirfunc);
#     }
#     return $dirfunc->($top, @results);
#   } else {
#     return $filefunc->($top);
#   }
# }

# sub file_size { -s $_[0] }

# sub dir_size {
#   my $dir = shift;
#   my $total = -s $dir;
#   my $n;
#   for $n (@_) { $total += $n }
#   return $total;
# }

# $total_size = dir_walk('.', \&file_size, \&dir_size);
def dir_walk(top, filefunc, dirfunc):
    if os.path.isdir(top):
        results = []
        try:
            for filename in os.listdir(top):
                results.append(dir_walk(os.path.join(top, filename), filefunc, dirfunc))
        except OSError:
            print "Couldn't open directory: %s; skipping." % top
            return
        return dirfunc(top, results)
    else:
        return filefunc(top)

def file_size(x):
    return os.path.getsize(x)

def dir_size(d, accum):
    _dirsize = os.path.getsize(d)
    total = _dirsize + sum(accum)
    return total

# sub dir_size {
#   my $dir = shift;
#   my $total = -s $dir;
#   my $n;
#   for $n (@_) { $total += $n }
#   printf "%6d %s\n", $total, $dir;
#   return $total;
# }
def dir_size(d, accum):
    _dirsize = os.path.getsize(d)
    total = _dirsize + sum(accum)
    print "%6d %ss" % (total, d)
    return total




# sub file {
#   my $file = shift;
#   [short($file), -s $file];
# }
# sub short {
#   my $path = shift;
#   $path = ̃ s{.*/}{};
#   $path;
# }


def _file(x):
    return os.path.basename(x), os.path.getsize(x)

# sub dir {
#   my ($dir, @subdirs) = @_;
#   my %new_hash;
#   for (@subdirs) {
#     my ($subdir_name, $subdir_structure) = @$_;
#     $new_hash{$subdir_name} = $subdir_structure;
#   }
#   return [short($dir), \%new_hash];
# }
def _dir(dir, subdirs):
    new_hash = {}
    for subdir_name, subdir_structure in subdirs:
        new_hash[subdir_name] = subdir_structure
    return os.path.basename(dir), new_hash


# sub print_filename { print $_[0], "\n" }
# dir_walk('.', \&print_filename, \&print_filename);

# we add a result parameter so that the function
# can work as dirfunc
def print_filename(x, result=None):
    print x
dir_walk(".", print_filename, print_filename)


# sub dangles {
#   my $file = shift;
#   print "$file\n" if -l $file && ! -e $file;
# }
# dir_walk('.', \&dangles, sub {});
def dangles(x):
    if os.path.islink(x) and not os.path.exists(os.readlink(x)):
        print x
dir_walk('.', dangles, lambda x,y: ())



# sub dir_walk {
#   my ($top, $filefunc, $dirfunc) = @_;
#   my $DIR;
#   if (-d $top) {
#     my $file;
#     unless (opendir $DIR, $top) {
#       warn "Couldn’t open directory $top: $!; skipping.\n";
#       return;
#     }
#     my @results;
#     while ($file = readdir $DIR) {
#       next if $file eq '.' || $file eq '..';
#       push @results, dir_walk("$top/$file" $filefunc, $dirfunc);
#                                           ,
#     }
#     return $dirfunc ? $dirfunc->($top, @results) : () ;
#   } else {
#     return $filefunc ? $filefunc->($top): () ;
#   }
# }


def dir_walk(top, filefunc=None, dirfunc=None):
    if os.path.isdir(top):
        results = []
        try:
            for filename in os.listdir(top):
                results.append(dir_walk(os.path.join(top, filename), filefunc, dirfunc))
        except OSError:
            print "Couldn't open directory: %s; skipping." % top
            return
        return dirfunc and dirfunc(top, results)
    else:
        return filefunc and filefunc(top)


#@all_plain_files = dir_walk('.', sub { $_[0] }, sub { shift; return @_ });
all_plain_files = dir_walk(".", lambda x: x, lambda x, y: y)


# { _tag => "p",
#   _content => [ "But I don't ",
#                 { _tag => "font",
#                   _content => [ "want" ],
#                   color => "red",
#                   size => 3,
#                 },
#                 " to go to bed now!",
#                 ],
# }

# so I'm going to punt on this.  I believe I could
# probably get something usefully similar from
# BeautifulSoup or lxml but I don't really
# want to vary from the original perl code *too* much
# so I'll just use a hardcoded dict representation
# and see how far I get with that.
tree =  { "_tag"      : "(document)",
          "_content" : [{"_tag" : "h1",
                         "_content" : [ "What Junior Said Next",
                                     ],                    
                         },
                        "\n\n",
                        {"_tag" : "p",
                         "_content" : [ "But I don't ",
                                        { "_tag" : "font",
                                          "_content" : [ "want" ],
                                          "color" : "red",
                                          "size" : 3,
                                          },
                                        " to go to bed now!",
                                        ],
                         }
                        ]
          }   
# sub untag_html {
#   my ($html) = @_;
#   return $html unless ref $html;   # It’s a plain string
#   my $text = '';
#   for my $item (@{$html->{_content}}) {
#     $text .= untag_html($item);
#   }
#   return $text;
# }
def untag_html(html):
    if type(html) != dict:
        return html

    text = ""
    for item in html["_content"]:
        text += untag_html(item)
    return text

# sub walk_html {
#   my ($html, $textfunc, $elementfunc) = @_;
#   return $textfunc->($html) unless ref $html; # It’s a plain string
#   my @results;
#   for my $item (@{$html->{_content}}) {
#     push @results, walk_html($item, $textfunc, $elementfunc);
#   }
#   return $elementfunc->($html, @results);
# }
# we are going to cheat hear a little and also
# pass around a concatenator function
# since perl's "push" function has some 
# "magic" properties that don't map well to python
def appender(accum, item):
    accum.append(item)
def extender(accum, item):
    accum.extend(item)
def walk_html(html, textfunc, elementfunc, 
              concatenator=appender):
    if type(html) == str:
        return textfunc(html)

    results = []
    for item in html["_content"]:
        concatenator(results, walk_html(item, textfunc, elementfunc, concatenator))

    return elementfunc(html, results)

# walk_html($tree, sub { $_[0] },
#                  sub { shift; join '', @_ });
walk_html(tree, lambda x: x, lambda x,y: "".join(y))

# sub print_if_h1tag {
#   my $element = shift;
#   my $text = join '', @_;
#   print $text if $element->{_tag} eq 'h1';
#   return $text;
# }
def print_if_h1tag(element, results):
    text = "".join(results)
    if element["_tag"] == "h1":
        print text
    return text

#walk_html($tree, sub { $_[0] }, \&print_if_h1tag);
walk_html(tree, lambda x: x, print_if_h1tag)


# @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
#                                  \&promote_if_h1tag);
# sub promote_if_h1tag {
#   my $element = shift;
#   if ($element->{_tag} eq 'h1') {
#     return ['KEEPER', join '', map {$_->[1]} @_];
#   } else {
#     return @_;
#   }
# }
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
                               promote_if_h1tag,
                               extender)
def promote_if_h1tag(element, results):
    if element["_tag"] == "h1":
        return [["KEEPER", "".join([ _x[1] for _x in results])]]
    else:
        return results


# sub extract_headers {
#   my $tree = shift;
#   my @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
#                                       \&promote_if_h1tag);
#   my @keepers = grep { $_->[0] eq 'KEEPER'} @tagged_texts;
#   my @keeper_text = map { $_->[1] } @keepers;
#   my $header_text = join '', @keeper_text;
#   return $header_text;
# }
def extract_headers(tree):
    tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
                                   promote_if_h1tag,
                                   extender)
    keepers = [x for x in tagged_texts if x[0] == "KEEPER"]
    keeper_text = [x[1] for x in keepers]
    header_text = "".join(keeper_text)
    return header_text


# sub extract_headers {
#   my $tree = shift;
#   my @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
#                                       \&promote_if_h1tag);
#   join '', map { $_->[1] } grep { $_->[0] eq 'KEEPER'} @tagged_texts;
# }
def extract_headers(tree):
    tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
                                   promote_if_h1tag,
                                   extender)
    return "".join([x[1] for x in tagged_texts if x[0] == "KEEPER"])


# sub promote_if_h1tag {
#   my $element = shift;
#   if ($element->{_tag} =~ /^h\d+$/) {
#     return ['KEEPER', join '', map {$_->[1]} @_];
#   } else {
#     return @_;
#   }
# }
def promote_if_h1tag(element, results):
    if re.search("^h\d+", element["_tag"]):
        return [["KEEPER", "".join([ _x[1] for _x in results])]]
    else:
        return results


# sub promote_if {
#   my $is_interesting = shift;
#   my $element = shift;
#   if ($is_interesting->($element->{_tag}) {
#     return ['KEEPER', join '', map {$_->[1]} @_];
#   } else {
#     return @_;
#   }
# }
def promote_if(is_interesting, element, results):
    if is_interesting(element["_tag"]):
        return [["KEEPER", "".join([ _x[1] for _x in results])]]
    else:
        return results


# my @tagged_texts = walk_html($tree,
#                               sub { ['maybe', $_[0]] },
#                               sub { promote_if(
#                                        sub { $_[0] eq 'h1'},
#                                        $_[0])
#                               });
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
                               lambda y,z: (promote_if((lambda _y: _y == "h1"), y,z)),
                               extender)


# sub fib {
#   my ($month) = @_;
#   if ($month < 2) { 1 }
#   else {
#     fib($month-1) + fib($month-2);
#   }
# }
def fib(month):
    if month < 2:
        return 1
    else:
        return fib(month-1) + fib(month-2)


# sub find_share {
#   my ($target, $treasures) = @_;
#   return [] if $target == 0;
#   return    if $target < 0 || @$treasures == 0;
#   my ($first, @rest) = @$treasures;
#   my $solution = find_share($target-$first, \@rest);
#   return [$first, @$solution] if $solution;
#   return         find_share($target       , \@rest);
# }
def find_share(target, treasures):
    if target == 0:
        return []
    if target < 0 or len(treasures) == 0:
        return
    first, rest = treasures[0], treasures[1:]
    solution = find_share(target-first, rest)
    if solution != None:
        return [first] + solution
    return find_share(target, rest)


# sub partition {
#   my $total = 0;
#   my $share_2;
#   for my $treasure (@_) {
#     $total += $treasure;
#   }
#   my $share_1 = find_share($total/2, [@_]);
#   return unless defined $share_1;
#   my %in_share_1;
#   for my $treasure (@$share_1) {
#     ++$in_share_1{$treasure};
#   }
#   for my $treasure (@_) {
#     if ($in_share_1{$treasure}) {
#       --$in_share_1{$treasure};
#     } else {
#       push @$share_2, $treasure;
#     }
#   }
#   return ($share_1, $share_2);
# }
# NOTE: would be more pythonic to use sum, sets()
def partition(treasures):
    share_2 = []
    total = 0

    for treasure in treasures:
        total += treasure

    share_1 = find_share(total/2, treasures)
    if not share_1:
        return 

    in_share_1 = {}
    for treasure in share_1:
        in_share_1.setdefault(treasure, 0)
        in_share_1[treasure] += 1

    for treasure in treasures:
        if treasure in in_share_1:
            in_share_1[treasure] -= 1
        else:
            share_2.append(treasure)
    
    return share_1, share_2

# a more pythonic way:
def partition_(treasures):
    share_1 = find_share(sum(treasures)/2, treasures)
    if not share_1:
        return 

    return share_1, list(set(treasures) - set(share_1))
lair of the dustbunny

Wednesday, January 14, 2009

Higher Order Perl (Python Style) : Chapter 1

No comments:

Blog Archive

About Me

Labels