Wednesday, January 14, 2009

Higher Order Perl (Python Style) : Chapter 1


#
# Recursion and Callbacks
#

# sub binary {
# my ($n) = @_;
# return $n if $n == 0 || $n == 1;

# my $k = int($n/2);
# my $b = $n % 2;

# my $E = binary($k);

# return $E . $b;
# }
def binary(n):
if n in (0,1):
return str(n)

k = n / 2
b = n % 2

return binary(k) + str(b)


# sub factorial {
# my ($n) = @_;
# return factorial($n-1) * $n;
# }
def factorial(n):
return factorial(n-1) * n


# sub factorial {
# my ($n) = @_;
# return 1 if $n == 0;
# return factorial($n-1) * $n;
# }
def factorial(n):
if n == 0:
return 1
return factorial(n-1) * n

# sub hanoi {
# my ($n, $start, $end, $extra) = @_;
# if ($n == 1) {
# print "Move disk #1 from $start to $end.\n"; # Step 1
# } else {
# hanoi($n-1, $start, $extra, $end); # Step 2
# print "Move disk #$n from $start to $end.\n"; # Step 3
# hanoi($n-1, $extra, $end, $start); # Step 4
# }
# }
def hanoi(n, start, end, extra):
if n == 1:
print "Move disk #1 from %(start)s to %(end)s." % locals()
else:
hanoi(n-1, start, extra, end)
print "Move disk #%(n)s from %(start)s to %(end)s." % locals()
hanoi(n-1, extra, end, start)



# sub hanoi {
# my ($n, $start, $end, $extra, $move_disk) = @_;
# if ($n == 1) {
# $move_disk->(1, $start, $end);
# } else {
# hanoi($n-1, $start, $extra, $end, $move_disk);
# $move_disk->($n, $start, $end);
# hanoi($n-1, $extra, $end, $start, $move_disk);
# }
# }
def hanoi(n, start, end, extra, move_disk):
if n == 1:
move_disk(1, start, end)
else:
hanoi(n-1, start, extra, end, move_disk)
move_disk(n, start, end)
hanoi(n-1, extra, end, start, move_disk)


# sub print_instruction {
# my ($disk, $start, $end) = @_;
# print "Move disk #$disk from $start to $end.\n";
# }
# hanoi(3, 'A', 'C', 'B', \&print_instruction);
def print_instruction(disk, start, end):
print "Move disk %(disk)s from %(start)s to %(end)s" % locals()

hanoi(3, "A", "C", "B", print_instruction)




# @position = ('', ('A') x 3); # Disks are all initially on peg A

# sub check_move {
# my $i;
# my ($disk, $start, $end) = @_;

# if ($disk < 1 || $disk > $#position) {
# die "Bad disk number $disk. Should be 1..$#position.\n";
# }

# unless ($position[$disk] eq $start) {
# die "Tried to move disk $disk from $start, but it is on peg
# $position[$disk].\n";
# }

# for $i (1 .. $disk-1) {
# if ($position[$i] eq $start) {
# die "Can't move disk $disk from $start because $i is on top of it.\n";
# } elsif ($position[$i] eq $end) {
# die "Can't move disk $disk to $end because $i is already there.\n";
# }
# }

# $position[$disk] = $end;
# }

# hanoi(3, 'A', 'C', 'B', \&check_move);
position = [""] + ["A"] * 3

def check_move(disk, start, end):
if disk < 1 or disk > len(position) - 1:
sys.exit("Bad disk number %s. Should be 1..%s" % (disk, len(position)-1))

if position[disk] != start:
sys.exit("Tried to move disk %s from %s, but it is on peg %s" % (disk, start, position[disk]))

for i in range(1, disk):
if position[i] == start:
sys.exit("Can't move disk %s from %s because %s is on top of it." % (disk, start, i))
elif position[i] == end:
sys.exit("Can't move disk %s to %s because %s is already there." % (disk, end, i))

position[disk] = end
print position

hanoi(3, "A", "C", "B", check_move)




# sub total_size {
# my ($top) = @_;
# my $total = -s $top;

# return $total if -f $top;
# unless (opendir DIR, $top) {
# warn "Couldn’t open directory $top: $!; skipping.\n";
# return $total;
# }

# my $file;
# while ($file = readdir DIR) {
# next if $file eq '.' || $file eq '..';
# $total += total_size("$top/$file");
# }

# closedir DIR;
# return $total;
# }

# NOTE: in python you'd probably prefer
# os.walk
# ALSO: it would be unnatural to duplicate the
# buggy first version of this function so we'll
# just be satisfied with it working. :)
def total_size(top):
total = os.path.getsize(top)

if os.path.isfile(top):
return total

try:
for filename in os.listdir(top):
total += total_size(os.path.join(top, filename))
except OSError:
print "Couldn't open directory: %s; skipping." % top
return total
return total

# sub dir_walk {
# my ($top, $code) = @_;
# my $DIR;
# $code->($top);
# if (-d $top) {
# my $file;
# unless (opendir $DIR, $top) {
# warn "Couldn’t open directory $top: $!; skipping.\n";
# return;
# }
# while ($file = readdir $DIR) {
# next if $file eq '.' || $file eq '..';
# dir_walk("$top/$file", $code);
# }
# }
# }

# the traditional python way would be to use os.walk()
def dir_walk(top, code):
code(top)

if os.path.isdir(top):
try:
for filename in os.listdir(top):
dir_walk(os.path.join(top, filename), code)
except OSError:
print "Couldn't open directory: %s; skipping." % top



# sub print_dir {
# print $_[0], "\n";
# }
def print_dir(x):
print x

#dir_walk('.', sub { printf "%6d %s\n", -s $_[0], $_[0] } );
dir_walk('.', lambda x: sys.stdout.write("%6d %s\n" % (os.path.getsize(x), x)))

#dir_walk('.', sub { print $_[0], "\n" if -l $_[0] && ! -e $_[0] });
dir_walk('.', lambda x: (os.path.islink(x) and os.path.exists(os.readlink(x)) and sys.stdout.write("%s\n" % x)))

# my $TOTAL = 0;
# dir_walk('.', sub { $TOTAL += -s $_[0] });
# print "Total size is $TOTAL.\n";

# can't run += or global w/in lambda
total = 0
def accumulate(x):
global total
total += os.path.getsize(x)
dir_walk('.', accumulate)
print "Total size is %s." % total


# sub dir_walk {
# my ($top, $filefunc, $dirfunc) = @_;
# my $DIR;
# if (-d $top) {
# my $file;
# unless (opendir $DIR, $top) {
# warn "Couldn’t open directory $code: $!; skipping.\n";
# return;
# }
# my @results;
# while ($file = readdir $DIR) {
# next if $file eq '.' || $file eq '..';
# push @results, dir_walk("$top/$file", $filefunc, $dirfunc);
# }
# return $dirfunc->($top, @results);
# } else {
# return $filefunc->($top);
# }
# }

# sub file_size { -s $_[0] }

# sub dir_size {
# my $dir = shift;
# my $total = -s $dir;
# my $n;
# for $n (@_) { $total += $n }
# return $total;
# }

# $total_size = dir_walk('.', \&file_size, \&dir_size);
def dir_walk(top, filefunc, dirfunc):
if os.path.isdir(top):
results = []
try:
for filename in os.listdir(top):
results.append(dir_walk(os.path.join(top, filename), filefunc, dirfunc))
except OSError:
print "Couldn't open directory: %s; skipping." % top
return
return dirfunc(top, results)
else:
return filefunc(top)

def file_size(x):
return os.path.getsize(x)

def dir_size(d, accum):
_dirsize = os.path.getsize(d)
total = _dirsize + sum(accum)
return total

# sub dir_size {
# my $dir = shift;
# my $total = -s $dir;
# my $n;
# for $n (@_) { $total += $n }
# printf "%6d %s\n", $total, $dir;
# return $total;
# }
def dir_size(d, accum):
_dirsize = os.path.getsize(d)
total = _dirsize + sum(accum)
print "%6d %ss" % (total, d)
return total




# sub file {
# my $file = shift;
# [short($file), -s $file];
# }
# sub short {
# my $path = shift;
# $path = ̃ s{.*/}{};
# $path;
# }


def _file(x):
return os.path.basename(x), os.path.getsize(x)

# sub dir {
# my ($dir, @subdirs) = @_;
# my %new_hash;
# for (@subdirs) {
# my ($subdir_name, $subdir_structure) = @$_;
# $new_hash{$subdir_name} = $subdir_structure;
# }
# return [short($dir), \%new_hash];
# }
def _dir(dir, subdirs):
new_hash = {}
for subdir_name, subdir_structure in subdirs:
new_hash[subdir_name] = subdir_structure
return os.path.basename(dir), new_hash


# sub print_filename { print $_[0], "\n" }
# dir_walk('.', \&print_filename, \&print_filename);

# we add a result parameter so that the function
# can work as dirfunc
def print_filename(x, result=None):
print x
dir_walk(".", print_filename, print_filename)


# sub dangles {
# my $file = shift;
# print "$file\n" if -l $file && ! -e $file;
# }
# dir_walk('.', \&dangles, sub {});
def dangles(x):
if os.path.islink(x) and not os.path.exists(os.readlink(x)):
print x
dir_walk('.', dangles, lambda x,y: ())



# sub dir_walk {
# my ($top, $filefunc, $dirfunc) = @_;
# my $DIR;
# if (-d $top) {
# my $file;
# unless (opendir $DIR, $top) {
# warn "Couldn’t open directory $top: $!; skipping.\n";
# return;
# }
# my @results;
# while ($file = readdir $DIR) {
# next if $file eq '.' || $file eq '..';
# push @results, dir_walk("$top/$file" $filefunc, $dirfunc);
# ,
# }
# return $dirfunc ? $dirfunc->($top, @results) : () ;
# } else {
# return $filefunc ? $filefunc->($top): () ;
# }
# }


def dir_walk(top, filefunc=None, dirfunc=None):
if os.path.isdir(top):
results = []
try:
for filename in os.listdir(top):
results.append(dir_walk(os.path.join(top, filename), filefunc, dirfunc))
except OSError:
print "Couldn't open directory: %s; skipping." % top
return
return dirfunc and dirfunc(top, results)
else:
return filefunc and filefunc(top)


#@all_plain_files = dir_walk('.', sub { $_[0] }, sub { shift; return @_ });
all_plain_files = dir_walk(".", lambda x: x, lambda x, y: y)


# { _tag => "p",
# _content => [ "But I don't ",
# { _tag => "font",
# _content => [ "want" ],
# color => "red",
# size => 3,
# },
# " to go to bed now!",
# ],
# }

# so I'm going to punt on this. I believe I could
# probably get something usefully similar from
# BeautifulSoup or lxml but I don't really
# want to vary from the original perl code *too* much
# so I'll just use a hardcoded dict representation
# and see how far I get with that.
tree = { "_tag" : "(document)",
"_content" : [{"_tag" : "h1",
"_content" : [ "What Junior Said Next",
],
},
"\n\n",
{"_tag" : "p",
"_content" : [ "But I don't ",
{ "_tag" : "font",
"_content" : [ "want" ],
"color" : "red",
"size" : 3,
},
" to go to bed now!",
],
}
]
}
# sub untag_html {
# my ($html) = @_;
# return $html unless ref $html; # It’s a plain string
# my $text = '';
# for my $item (@{$html->{_content}}) {
# $text .= untag_html($item);
# }
# return $text;
# }
def untag_html(html):
if type(html) != dict:
return html

text = ""
for item in html["_content"]:
text += untag_html(item)
return text

# sub walk_html {
# my ($html, $textfunc, $elementfunc) = @_;
# return $textfunc->($html) unless ref $html; # It’s a plain string
# my @results;
# for my $item (@{$html->{_content}}) {
# push @results, walk_html($item, $textfunc, $elementfunc);
# }
# return $elementfunc->($html, @results);
# }
# we are going to cheat hear a little and also
# pass around a concatenator function
# since perl's "push" function has some
# "magic" properties that don't map well to python
def appender(accum, item):
accum.append(item)
def extender(accum, item):
accum.extend(item)
def walk_html(html, textfunc, elementfunc,
concatenator=appender):
if type(html) == str:
return textfunc(html)

results = []
for item in html["_content"]:
concatenator(results, walk_html(item, textfunc, elementfunc, concatenator))

return elementfunc(html, results)

# walk_html($tree, sub { $_[0] },
# sub { shift; join '', @_ });
walk_html(tree, lambda x: x, lambda x,y: "".join(y))

# sub print_if_h1tag {
# my $element = shift;
# my $text = join '', @_;
# print $text if $element->{_tag} eq 'h1';
# return $text;
# }
def print_if_h1tag(element, results):
text = "".join(results)
if element["_tag"] == "h1":
print text
return text

#walk_html($tree, sub { $_[0] }, \&print_if_h1tag);
walk_html(tree, lambda x: x, print_if_h1tag)


# @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
# \&promote_if_h1tag);
# sub promote_if_h1tag {
# my $element = shift;
# if ($element->{_tag} eq 'h1') {
# return ['KEEPER', join '', map {$_->[1]} @_];
# } else {
# return @_;
# }
# }
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
promote_if_h1tag,
extender)
def promote_if_h1tag(element, results):
if element["_tag"] == "h1":
return [["KEEPER", "".join([ _x[1] for _x in results])]]
else:
return results


# sub extract_headers {
# my $tree = shift;
# my @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
# \&promote_if_h1tag);
# my @keepers = grep { $_->[0] eq 'KEEPER'} @tagged_texts;
# my @keeper_text = map { $_->[1] } @keepers;
# my $header_text = join '', @keeper_text;
# return $header_text;
# }
def extract_headers(tree):
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
promote_if_h1tag,
extender)
keepers = [x for x in tagged_texts if x[0] == "KEEPER"]
keeper_text = [x[1] for x in keepers]
header_text = "".join(keeper_text)
return header_text


# sub extract_headers {
# my $tree = shift;
# my @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
# \&promote_if_h1tag);
# join '', map { $_->[1] } grep { $_->[0] eq 'KEEPER'} @tagged_texts;
# }
def extract_headers(tree):
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
promote_if_h1tag,
extender)
return "".join([x[1] for x in tagged_texts if x[0] == "KEEPER"])


# sub promote_if_h1tag {
# my $element = shift;
# if ($element->{_tag} =~ /^h\d+$/) {
# return ['KEEPER', join '', map {$_->[1]} @_];
# } else {
# return @_;
# }
# }
def promote_if_h1tag(element, results):
if re.search("^h\d+", element["_tag"]):
return [["KEEPER", "".join([ _x[1] for _x in results])]]
else:
return results


# sub promote_if {
# my $is_interesting = shift;
# my $element = shift;
# if ($is_interesting->($element->{_tag}) {
# return ['KEEPER', join '', map {$_->[1]} @_];
# } else {
# return @_;
# }
# }
def promote_if(is_interesting, element, results):
if is_interesting(element["_tag"]):
return [["KEEPER", "".join([ _x[1] for _x in results])]]
else:
return results


# my @tagged_texts = walk_html($tree,
# sub { ['maybe', $_[0]] },
# sub { promote_if(
# sub { $_[0] eq 'h1'},
# $_[0])
# });
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
lambda y,z: (promote_if((lambda _y: _y == "h1"), y,z)),
extender)


# sub fib {
# my ($month) = @_;
# if ($month < 2) { 1 }
# else {
# fib($month-1) + fib($month-2);
# }
# }
def fib(month):
if month < 2:
return 1
else:
return fib(month-1) + fib(month-2)


# sub find_share {
# my ($target, $treasures) = @_;
# return [] if $target == 0;
# return if $target < 0 || @$treasures == 0;
# my ($first, @rest) = @$treasures;
# my $solution = find_share($target-$first, \@rest);
# return [$first, @$solution] if $solution;
# return find_share($target , \@rest);
# }
def find_share(target, treasures):
if target == 0:
return []
if target < 0 or len(treasures) == 0:
return
first, rest = treasures[0], treasures[1:]
solution = find_share(target-first, rest)
if solution != None:
return [first] + solution
return find_share(target, rest)


# sub partition {
# my $total = 0;
# my $share_2;
# for my $treasure (@_) {
# $total += $treasure;
# }
# my $share_1 = find_share($total/2, [@_]);
# return unless defined $share_1;
# my %in_share_1;
# for my $treasure (@$share_1) {
# ++$in_share_1{$treasure};
# }
# for my $treasure (@_) {
# if ($in_share_1{$treasure}) {
# --$in_share_1{$treasure};
# } else {
# push @$share_2, $treasure;
# }
# }
# return ($share_1, $share_2);
# }
# NOTE: would be more pythonic to use sum, sets()
def partition(treasures):
share_2 = []
total = 0

for treasure in treasures:
total += treasure

share_1 = find_share(total/2, treasures)
if not share_1:
return

in_share_1 = {}
for treasure in share_1:
in_share_1.setdefault(treasure, 0)
in_share_1[treasure] += 1

for treasure in treasures:
if treasure in in_share_1:
in_share_1[treasure] -= 1
else:
share_2.append(treasure)

return share_1, share_2

# a more pythonic way:
def partition_(treasures):
share_1 = find_share(sum(treasures)/2, treasures)
if not share_1:
return

return share_1, list(set(treasures) - set(share_1))

No comments: