## Wednesday, January 14, 2009

### Higher Order Perl (Python Style) : Chapter 1

`## Recursion and Callbacks## sub binary {#   my (\$n) = @_;#   return \$n if \$n == 0 || \$n == 1;#   my \$k = int(\$n/2);#   my \$b = \$n % 2;#   my \$E = binary(\$k);#   return \$E . \$b;# }def binary(n):    if n in (0,1):        return str(n)    k = n / 2    b = n % 2    return binary(k) + str(b)# sub factorial {#   my (\$n) = @_;#   return factorial(\$n-1) * \$n;# }def factorial(n):    return factorial(n-1) * n# sub factorial {#   my (\$n) = @_;#   return 1 if \$n == 0;#   return factorial(\$n-1) * \$n;# }def factorial(n):    if n == 0:        return 1    return factorial(n-1) * n# sub hanoi {#   my (\$n, \$start, \$end, \$extra) = @_;#   if (\$n == 1) {#     print "Move disk #1 from \$start to \$end.\n";  # Step 1#   } else {#     hanoi(\$n-1, \$start, \$extra, \$end);            # Step 2#     print "Move disk #\$n from \$start to \$end.\n"; # Step 3#     hanoi(\$n-1, \$extra, \$end, \$start);            # Step 4#   }# }def hanoi(n, start, end, extra):    if n == 1:        print "Move disk #1 from %(start)s to %(end)s." % locals()    else:        hanoi(n-1, start, extra, end)        print "Move disk #%(n)s from %(start)s to %(end)s." % locals()        hanoi(n-1, extra, end, start)        # sub hanoi {#   my (\$n, \$start, \$end, \$extra, \$move_disk) = @_;#   if (\$n == 1) {#     \$move_disk->(1, \$start, \$end);#   } else {#     hanoi(\$n-1, \$start, \$extra, \$end, \$move_disk);#     \$move_disk->(\$n, \$start, \$end);#     hanoi(\$n-1, \$extra, \$end, \$start, \$move_disk);#   }# }def hanoi(n, start, end, extra, move_disk):    if n == 1:        move_disk(1, start, end)    else:        hanoi(n-1, start, extra, end, move_disk)        move_disk(n, start, end)        hanoi(n-1, extra, end, start, move_disk)    # sub print_instruction {#   my (\$disk, \$start, \$end) = @_;#   print "Move disk #\$disk from \$start to \$end.\n";# }# hanoi(3, 'A', 'C', 'B', \&print_instruction);def print_instruction(disk, start, end):    print "Move disk %(disk)s from %(start)s to %(end)s" % locals()hanoi(3, "A", "C", "B", print_instruction)# @position = ('', ('A') x 3); # Disks are all initially on peg A# sub check_move {#     my \$i;#     my (\$disk, \$start, \$end) = @_;#     if (\$disk < 1 || \$disk > \$#position) {#        die "Bad disk number \$disk. Should be 1..\$#position.\n";#     }#     unless (\$position[\$disk] eq \$start) {#        die "Tried to move disk \$disk from \$start, but it is on peg#                                                       \$position[\$disk].\n";#     }#     for \$i (1 .. \$disk-1) {#        if (\$position[\$i] eq \$start) {#            die "Can't move disk \$disk from \$start because \$i is on top of it.\n";#        } elsif (\$position[\$i] eq \$end) {#            die "Can't move disk \$disk to \$end because \$i is already there.\n";#        }#     }#     \$position[\$disk] = \$end;# }# hanoi(3, 'A', 'C', 'B', \&check_move);position = [""] + ["A"] * 3def check_move(disk, start, end):    if disk < 1 or disk > len(position) - 1:        sys.exit("Bad disk number %s. Should be 1..%s" % (disk, len(position)-1))    if position[disk] != start:        sys.exit("Tried to move disk %s from %s, but it is on peg %s" % (disk, start, position[disk]))    for i in range(1, disk):        if position[i] == start:            sys.exit("Can't move disk %s from %s because %s is on top of it." % (disk, start, i))        elif position[i] == end:            sys.exit("Can't move disk %s to %s because %s is already there." % (disk, end, i))    position[disk] = end    print positionhanoi(3, "A", "C", "B", check_move)# sub total_size {#    my (\$top) = @_;#    my \$total = -s \$top;#    return \$total if -f \$top;#    unless (opendir DIR, \$top) {#       warn "Couldn’t open directory \$top: \$!; skipping.\n";#       return \$total;#    }#    my \$file;#    while (\$file = readdir DIR) {#      next if \$file eq '.' || \$file eq '..';#      \$total += total_size("\$top/\$file");#    }#    closedir DIR;#    return \$total;# }# NOTE: in python you'd probably prefer#       os.walk# ALSO: it would be unnatural to duplicate the #       buggy first version of this function so we'll#       just be satisfied with it working. :) def total_size(top):    total = os.path.getsize(top)    if os.path.isfile(top):        return total    try:        for filename in os.listdir(top):            total += total_size(os.path.join(top, filename))    except OSError:        print "Couldn't open directory: %s; skipping." % top        return total    return total# sub dir_walk {#   my (\$top, \$code) = @_;#   my \$DIR;#   \$code->(\$top);#   if (-d \$top) {#     my \$file;#     unless (opendir \$DIR, \$top) {#       warn "Couldn’t open directory \$top: \$!; skipping.\n";#       return;#     }#     while (\$file = readdir \$DIR) {#       next if \$file eq '.' || \$file eq '..';#       dir_walk("\$top/\$file", \$code);#     }#   }# }# the traditional python way would be to use os.walk()def dir_walk(top, code):    code(top)        if os.path.isdir(top):        try:            for filename in os.listdir(top):                dir_walk(os.path.join(top, filename), code)        except OSError:            print "Couldn't open directory: %s; skipping." % top        # sub print_dir {#   print \$_[0], "\n";# }def print_dir(x):    print x#dir_walk('.', sub { printf "%6d %s\n", -s \$_[0], \$_[0] } );dir_walk('.', lambda x: sys.stdout.write("%6d %s\n" % (os.path.getsize(x), x)))#dir_walk('.', sub { print \$_[0], "\n" if -l \$_[0] && ! -e \$_[0] });dir_walk('.', lambda x: (os.path.islink(x) and os.path.exists(os.readlink(x)) and sys.stdout.write("%s\n" % x)))# my \$TOTAL = 0;# dir_walk('.', sub { \$TOTAL += -s \$_[0] });# print "Total size is \$TOTAL.\n";# can't run += or global w/in lambdatotal = 0def accumulate(x):    global total    total += os.path.getsize(x)dir_walk('.', accumulate)print "Total size is %s." % total# sub dir_walk { #   my (\$top, \$filefunc, \$dirfunc) = @_;#   my \$DIR;#   if (-d \$top) {#     my \$file;#     unless (opendir \$DIR, \$top) {#       warn "Couldn’t open directory \$code: \$!; skipping.\n";#       return;#     }#     my @results;#     while (\$file = readdir \$DIR) {#       next if \$file eq '.' || \$file eq '..';#       push @results, dir_walk("\$top/\$file", \$filefunc, \$dirfunc);#     }#     return \$dirfunc->(\$top, @results);#   } else {#     return \$filefunc->(\$top);#   }# }# sub file_size { -s \$_[0] }# sub dir_size {#   my \$dir = shift;#   my \$total = -s \$dir;#   my \$n;#   for \$n (@_) { \$total += \$n }#   return \$total;# }# \$total_size = dir_walk('.', \&file_size, \&dir_size);def dir_walk(top, filefunc, dirfunc):    if os.path.isdir(top):        results = []        try:            for filename in os.listdir(top):                results.append(dir_walk(os.path.join(top, filename), filefunc, dirfunc))        except OSError:            print "Couldn't open directory: %s; skipping." % top            return        return dirfunc(top, results)    else:        return filefunc(top)def file_size(x):    return os.path.getsize(x)def dir_size(d, accum):    _dirsize = os.path.getsize(d)    total = _dirsize + sum(accum)    return total# sub dir_size {#   my \$dir = shift;#   my \$total = -s \$dir;#   my \$n;#   for \$n (@_) { \$total += \$n }#   printf "%6d %s\n", \$total, \$dir;#   return \$total;# }def dir_size(d, accum):    _dirsize = os.path.getsize(d)    total = _dirsize + sum(accum)    print "%6d %ss" % (total, d)    return total# sub file {#   my \$file = shift;#   [short(\$file), -s \$file];# }# sub short {#   my \$path = shift;#   \$path = ̃ s{.*/}{};#   \$path;# }def _file(x):    return os.path.basename(x), os.path.getsize(x)# sub dir {#   my (\$dir, @subdirs) = @_;#   my %new_hash;#   for (@subdirs) {#     my (\$subdir_name, \$subdir_structure) = @\$_;#     \$new_hash{\$subdir_name} = \$subdir_structure;#   }#   return [short(\$dir), \%new_hash];# }def _dir(dir, subdirs):    new_hash = {}    for subdir_name, subdir_structure in subdirs:        new_hash[subdir_name] = subdir_structure    return os.path.basename(dir), new_hash# sub print_filename { print \$_[0], "\n" }# dir_walk('.', \&print_filename, \&print_filename);# we add a result parameter so that the function# can work as dirfuncdef print_filename(x, result=None):    print xdir_walk(".", print_filename, print_filename)# sub dangles {#   my \$file = shift;#   print "\$file\n" if -l \$file && ! -e \$file;# }# dir_walk('.', \&dangles, sub {});def dangles(x):    if os.path.islink(x) and not os.path.exists(os.readlink(x)):        print xdir_walk('.', dangles, lambda x,y: ())# sub dir_walk {#   my (\$top, \$filefunc, \$dirfunc) = @_;#   my \$DIR;#   if (-d \$top) {#     my \$file;#     unless (opendir \$DIR, \$top) {#       warn "Couldn’t open directory \$top: \$!; skipping.\n";#       return;#     }#     my @results;#     while (\$file = readdir \$DIR) {#       next if \$file eq '.' || \$file eq '..';#       push @results, dir_walk("\$top/\$file" \$filefunc, \$dirfunc);#                                           ,#     }#     return \$dirfunc ? \$dirfunc->(\$top, @results) : () ;#   } else {#     return \$filefunc ? \$filefunc->(\$top): () ;#   }# }def dir_walk(top, filefunc=None, dirfunc=None):    if os.path.isdir(top):        results = []        try:            for filename in os.listdir(top):                results.append(dir_walk(os.path.join(top, filename), filefunc, dirfunc))        except OSError:            print "Couldn't open directory: %s; skipping." % top            return        return dirfunc and dirfunc(top, results)    else:        return filefunc and filefunc(top)#@all_plain_files = dir_walk('.', sub { \$_[0] }, sub { shift; return @_ });all_plain_files = dir_walk(".", lambda x: x, lambda x, y: y)# { _tag => "p",#   _content => [ "But I don't ",#                 { _tag => "font",#                   _content => [ "want" ],#                   color => "red",#                   size => 3,#                 },#                 " to go to bed now!",#                 ],# }# so I'm going to punt on this.  I believe I could# probably get something usefully similar from# BeautifulSoup or lxml but I don't really# want to vary from the original perl code *too* much# so I'll just use a hardcoded dict representation# and see how far I get with that.tree =  { "_tag"      : "(document)",          "_content" : [{"_tag" : "h1",                         "_content" : [ "What Junior Said Next",                                     ],                                             },                        "\n\n",                        {"_tag" : "p",                         "_content" : [ "But I don't ",                                        { "_tag" : "font",                                          "_content" : [ "want" ],                                          "color" : "red",                                          "size" : 3,                                          },                                        " to go to bed now!",                                        ],                         }                        ]          }   # sub untag_html {#   my (\$html) = @_;#   return \$html unless ref \$html;   # It’s a plain string#   my \$text = '';#   for my \$item (@{\$html->{_content}}) {#     \$text .= untag_html(\$item);#   }#   return \$text;# }def untag_html(html):    if type(html) != dict:        return html    text = ""    for item in html["_content"]:        text += untag_html(item)    return text# sub walk_html {#   my (\$html, \$textfunc, \$elementfunc) = @_;#   return \$textfunc->(\$html) unless ref \$html; # It’s a plain string#   my @results;#   for my \$item (@{\$html->{_content}}) {#     push @results, walk_html(\$item, \$textfunc, \$elementfunc);#   }#   return \$elementfunc->(\$html, @results);# }# we are going to cheat hear a little and also# pass around a concatenator function# since perl's "push" function has some # "magic" properties that don't map well to pythondef appender(accum, item):    accum.append(item)def extender(accum, item):    accum.extend(item)def walk_html(html, textfunc, elementfunc,               concatenator=appender):    if type(html) == str:        return textfunc(html)    results = []    for item in html["_content"]:        concatenator(results, walk_html(item, textfunc, elementfunc, concatenator))    return elementfunc(html, results)# walk_html(\$tree, sub { \$_[0] },#                  sub { shift; join '', @_ });walk_html(tree, lambda x: x, lambda x,y: "".join(y))# sub print_if_h1tag {#   my \$element = shift;#   my \$text = join '', @_;#   print \$text if \$element->{_tag} eq 'h1';#   return \$text;# }def print_if_h1tag(element, results):    text = "".join(results)    if element["_tag"] == "h1":        print text    return text#walk_html(\$tree, sub { \$_[0] }, \&print_if_h1tag);walk_html(tree, lambda x: x, print_if_h1tag)# @tagged_texts = walk_html(\$tree, sub { ['MAYBE', \$_[0]] },#                                  \&promote_if_h1tag);# sub promote_if_h1tag {#   my \$element = shift;#   if (\$element->{_tag} eq 'h1') {#     return ['KEEPER', join '', map {\$_->[1]} @_];#   } else {#     return @_;#   }# }tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],                               promote_if_h1tag,                               extender)def promote_if_h1tag(element, results):    if element["_tag"] == "h1":        return [["KEEPER", "".join([ _x[1] for _x in results])]]    else:        return results# sub extract_headers {#   my \$tree = shift;#   my @tagged_texts = walk_html(\$tree, sub { ['MAYBE', \$_[0]] },#                                       \&promote_if_h1tag);#   my @keepers = grep { \$_->[0] eq 'KEEPER'} @tagged_texts;#   my @keeper_text = map { \$_->[1] } @keepers;#   my \$header_text = join '', @keeper_text;#   return \$header_text;# }def extract_headers(tree):    tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],                                   promote_if_h1tag,                                   extender)    keepers = [x for x in tagged_texts if x[0] == "KEEPER"]    keeper_text = [x[1] for x in keepers]    header_text = "".join(keeper_text)    return header_text# sub extract_headers {#   my \$tree = shift;#   my @tagged_texts = walk_html(\$tree, sub { ['MAYBE', \$_[0]] },#                                       \&promote_if_h1tag);#   join '', map { \$_->[1] } grep { \$_->[0] eq 'KEEPER'} @tagged_texts;# }def extract_headers(tree):    tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],                                   promote_if_h1tag,                                   extender)    return "".join([x[1] for x in tagged_texts if x[0] == "KEEPER"])# sub promote_if_h1tag {#   my \$element = shift;#   if (\$element->{_tag} =~ /^h\d+\$/) {#     return ['KEEPER', join '', map {\$_->[1]} @_];#   } else {#     return @_;#   }# }def promote_if_h1tag(element, results):    if re.search("^h\d+", element["_tag"]):        return [["KEEPER", "".join([ _x[1] for _x in results])]]    else:        return results# sub promote_if {#   my \$is_interesting = shift;#   my \$element = shift;#   if (\$is_interesting->(\$element->{_tag}) {#     return ['KEEPER', join '', map {\$_->[1]} @_];#   } else {#     return @_;#   }# }def promote_if(is_interesting, element, results):    if is_interesting(element["_tag"]):        return [["KEEPER", "".join([ _x[1] for _x in results])]]    else:        return results# my @tagged_texts = walk_html(\$tree,#                               sub { ['maybe', \$_[0]] },#                               sub { promote_if(#                                        sub { \$_[0] eq 'h1'},#                                        \$_[0])#                               });tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],                               lambda y,z: (promote_if((lambda _y: _y == "h1"), y,z)),                               extender)# sub fib {#   my (\$month) = @_;#   if (\$month < 2) { 1 }#   else {#     fib(\$month-1) + fib(\$month-2);#   }# }def fib(month):    if month < 2:        return 1    else:        return fib(month-1) + fib(month-2)# sub find_share {#   my (\$target, \$treasures) = @_;#   return [] if \$target == 0;#   return    if \$target < 0 || @\$treasures == 0;#   my (\$first, @rest) = @\$treasures;#   my \$solution = find_share(\$target-\$first, \@rest);#   return [\$first, @\$solution] if \$solution;#   return         find_share(\$target       , \@rest);# }def find_share(target, treasures):    if target == 0:        return []    if target < 0 or len(treasures) == 0:        return    first, rest = treasures[0], treasures[1:]    solution = find_share(target-first, rest)    if solution != None:        return [first] + solution    return find_share(target, rest)# sub partition {#   my \$total = 0;#   my \$share_2;#   for my \$treasure (@_) {#     \$total += \$treasure;#   }#   my \$share_1 = find_share(\$total/2, [@_]);#   return unless defined \$share_1;#   my %in_share_1;#   for my \$treasure (@\$share_1) {#     ++\$in_share_1{\$treasure};#   }#   for my \$treasure (@_) {#     if (\$in_share_1{\$treasure}) {#       --\$in_share_1{\$treasure};#     } else {#       push @\$share_2, \$treasure;#     }#   }#   return (\$share_1, \$share_2);# }# NOTE: would be more pythonic to use sum, sets()def partition(treasures):    share_2 = []    total = 0    for treasure in treasures:        total += treasure    share_1 = find_share(total/2, treasures)    if not share_1:        return     in_share_1 = {}    for treasure in share_1:        in_share_1.setdefault(treasure, 0)        in_share_1[treasure] += 1    for treasure in treasures:        if treasure in in_share_1:            in_share_1[treasure] -= 1        else:            share_2.append(treasure)        return share_1, share_2# a more pythonic way:def partition_(treasures):    share_1 = find_share(sum(treasures)/2, treasures)    if not share_1:        return     return share_1, list(set(treasures) - set(share_1))`