Wednesday, April 8, 2009

Higher Order Perl (Python Style) : Chapter 7 - Higher Order Functions And Currying




TOC


### 7.1 Currying


# @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
# \&promote_if_h1tag);
# sub promote_if_h1tag {
# my $element = shift;
# if ($element->{_tag} eq 'h1') {
# return ['KEEPER', join '', map {$_->[1]} @_];
# } else {
# return @_;
# }
# }
# sub extract_headers {
# my $tree = shift;
# my @tagged_texts = walk_html($tree, sub { ['MAYBE', $_[0]] },
# \&promote_if_h1tag);
# my @keepers = grep { $_->[0] eq 'KEEPER' } @tagged_texts;
# my @keeper_text = map { $_->[1] } @keepers;
# my $header_text = join '', @keeper_text;
# return $header_text;
# }
def extender(accum, item):
accum.extend(item)
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
promote_if_h1tag,
extender)
def promote_if_h1tag(element, results):
if element["_tag"] == "h1":
return [["KEEPER", "".join([ _x[1] for _x in results])]]
else:
return results

def extract_headers(tree):
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
promote_if_h1tag,
extender)
keepers = [x for x in tagged_texts if x[0] == "KEEPER"]
keeper_text = [x[1] for x in keepers]
header_text = "".join(keeper_text)
return header_text


# sub promote_if {
# my $is_interesting = shift;
# my $element = shift;
# if ($is_interesting->($element->{_tag}) {
# return ['keeper', join '', map {$_->[1]} @_];
# } else {
# return @_;
# }
# }
# my @tagged_texts = walk_html($tree,
# sub { ['maybe', $_[0]] },
# sub { promote_if(
# sub { $_[0] eq 'h1' },
# $_[0])
# });
def promote_if(is_interesting, element, results):
if is_interesting(element["_tag"]):
return [["KEEPER", "".join([ _x[1] for _x in results])]]
else:
return results
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
lambda y,z: (promote_if((lambda _y: _y == "h1"), y,z)),
extender)



# sub promote_if {
# my $is_interesting = shift;
# return sub {
# my $element = shift;
# if ($is_interesting->($element->{_tag}) {
# return ['keeper', join '', map {$_->[1]} @_];
# } else {
# return @_;
# }
# }
# }
def promote_if(is_interesting):
def _foo(element, results):
if is_interesting(element["_tag"]):
return [["KEEPER", "".join([ _x[1] for _x in results])]]
else:
return results
return _foo


# my @tagged_texts = walk_html($tree,
# sub { ['maybe', $_[0]] },
# promote_if(sub { $_[0] eq 'h1' }),
# );
tagged_texts = walk_html(tree, lambda x: [["MAYBE", x]],
promote_if(lambda y: y == "h1"),
extender)


# sub add2 {
# my ($s, $t) = @_;
# return unless $s && $t;
# node(head($s) + head($t),
# promise { add2(tail($s), tail($t)) });
# }
# sub mul2 {
# my ($s, $t) = @_;
# return unless $s && $t;
# node(head($s) * head($t),
# promise { mul2(tail($s), tail($t)) });
# }
def add2(s,t):
if not (t and s):
return None
return node(head(s) + head(t),
promise(lambda: add2(tail(s),tail(t))))
def mul2(s,t):
if not (s and t):
return None
return node(head(s)*head(t),
promise(lambda: mul2(tail(s),tail(t))))


# sub combine2 {
# my ($s, $t, $op) = @_;
# return unless $s && $t;
# node($op->(head($s), head($t)),
# promise { combine2(tail($s), tail($t), $op) });
# }
def combine2(s,t,op):
if not (s and t):
return None
return node(op(head(s),head(t)),
promise(lambda: combine2(tail(s),tail(t), op)))

# sub add2 { combine2(@_, sub { $_[0] + $_[1] }) }
# sub mul2 { combine2(@_, sub { $_[0] * $_[1] }) }
def add2(s,t):
return combine2(s, t, lambda _s, _t: _s + _t)
def mul2(s,t):
return combine2(s, t, lambda _s, _t: _s * _t)




# sub combine2 {
# my $op = shift;
# return sub {
# my ($s, $t) = @_;
# return unless $s && $t;
# node($op->(head($s), head($t)),
# promise { combine2($op)->(tail($s), tail($t))});
# };
# }
def combine2(op):
def _foo(s, t):
if not (s and t):
return None
return node(op(head(s),head(t)),
promise(lambda: combine2(op(tail(s), tail(t)))))

return _foo


# $add2 = combine2(sub { $_[0] + $_[1] });
# $mul2 = combine2(sub { $_[0] * $_[1] });
add2 = combine2(lambda x,y: x+y)
mul2 = combine2(lambda x,y: x*y)


# my $catstrs = combine2(sub { "$_[0]$_[1]" })->($s, $t);
catstrs = combine2(lambda x,y: "%s%s" % (x,y))(s,t)



# sub scale {
# my $s = shift;
# return sub {
# my $c = shift;
# return if $c == 0;
# transform { $_[0] * $c } $s;
# }
# }
def scale(s):
def _foo(c):
if c == 0:
return None
return transform(lambda x: x*c, s)
return _foo



# sub scale {
# my $c = shift;
# return sub {
# my $s = shift;
# transform { $_[0] * $c } $s;
# }
# }
def scale(c):
def _foo(s):
return transform(lambda x: x*c, s)
return _foo



# sub slope {
# my ($f, $x) = @_;
# my $e = 0.00000095367431640625;
# ($f->($x+$e) - $f->($x-$e)) / (2*$e);
# }
def slope(f, x):
e = 0.00000095367431640625
return (f(x+e) - f(x-e)) / (2*e)



# sub slope {
# my $f = shift;
# my $e = 0.00000095367431640625;
# return sub {
# my $x = shift;
# ($f->($x+$e) - $f->($x-$e)) / (2*$e);
# };
# }
def slope(f):
e = 0.00000095367431640625
def _foo(x):
return (f(x+e) - f(x-e)) / (2*e)
return _foo



# sub slope {
# my $f = shift;
# my $e = 0.00000095367431640625;
# my $d = sub {
# my ($x) = shift;
# ($f->($x+$e) - $f->($x-$e)) / (2*$e);
# };
# return @_ ? $d->(shift) : $d;
# }
def slope(f, _x=None):
e = 0.00000095367431640625
def d(x):
return (f(x+e) - f(x-e)) / (2*e)
return d(_x) if _x != None else d




# sub iterate_function {
# my ($f, $x) = @_;
# my $s;
# $s = node($x, promise { &transform($f, $s) });
# }
def iterate_function(f, x):
s = node(x, promise(lambda: transform(f, s)))
return s


# sub iterate_function {
# my $f = shift;
# return sub {
# my $x = shift;
# my $s;
# $s = node($x, promise { &transform($f, $s) });
# };
# }
def iterate_function(f):
def foo(x):
s = node(x, promise(lambda: transform(f, s)))
return s
return foo


# *upfrom = iterate_function(sub { $_[0] + 1 });
upfrom = iterate_function(lambda x: x + 1)

# *pow2_from = iterate_function(sub { $_[0] * 2 });
pow2_from = iterate_function(lambda x: x * 2)


# sub combine2 {
# my $op = shift;
# return sub {
# my ($s, $t) = @_;
# return unless $s && $t;
# node($op->(head($s), head($t)),
# promise { combine2($op)->(tail($s), tail($t)) });
# };
# }
def combine2(op):
def _foo(s, t):
if not (s and t):
return None
return node(op(head(s),head(t)),
promise(lambda: combine2(op(tail(s), tail(t)))))

return _foo



# sub combine2 {
# my $op = shift;
# my $r;
# $r = sub {
# my ($s, $t) = @_;
# return unless $s && $t;
# node($op->(head($s), head($t)),
# promise { $r->(tail($s), tail($t)) });
# };
# }
def combine2(op):
def r(s,t):
if not (s and t):
return None
return node(op(head(s),head(t)),
promise(lambda: r(tail(s),tail(t))))
return r



### 7.2 Common Higher-Order Functions

# map { $_ * 2 } (1..5); # returns 2, 4, 6, 8, 10
# grep { $_ % 2 == 0 } (1..10); # returns 2, 4, 6, 8, 10
## while map exists in python the more usual idiom
## is to use list comprehensions/iterators
[x*2 for x in range(1,6)]
[x for x in range(1,11) if x % 2 == 0 ]


# sub cmap (&) {
# my $f = shift;
# my $r = sub {
# my @result;
# for (@_) {
# push @result, $f->($_);
# }
# @result;
# };
# return $r;
# }
# sub cgrep (&) {
# my $f = shift;
# my $r = sub {
# my @result;
# for (@_ ) {
# push @result, $_ if $f->($_ );
# }
# @result;
# };
# return $r;
# }
## skipping the more obvious generator solution
def cmap(f):
def r(items):
result = []
for i in items:
result.append(f(i))
return result
return r
def cgrep(f):
def r(items):
result = []
for i in items:
if f(i):
result.append(i)
return result
return r

# $double = cmap { $_ * 2 };
# $find_slashdot = cgrep { $_->{referer} =~ /slashdot/i };
double = cmap(lambda x: x*2)
find_slashdot = cgrep(lambda x: "slashdot" in x["referer"].lower())



# sub cmap (&;@) {
# my $f = shift;
# my $r = sub {
# my @result;
# for (@_) {
# push @result, $f->($_);
# }
# @result;
# };
# return @_ ? $r->(@_) : $r;
# }
def cmap(f, lst=[]):
def r(_lst):
result = []
for item in _lst:
result.append(f(item))
return result
return r(lst) if lst != None else r


# @doubles = cmap { $_ * 2 } (1..5);
# @evens = cgrep { $_ % 2 == 0 } (1..10);
doubles = cmap(lambda x: x*2, range(1,6))
evens = cgrep(lambda x: x % 2 == 0, range(1,11))



# sub some_curried_function {
# my $first_arg = shift;
# my $r = sub {
# ...
# };
# return @_ ? $r->(@_) : $r;
# }
def some_curried_function(first_arg, lst=None):
def r(...):
...
return r(lst) if lst != None else r



# package Curry;
# use base 'Exporter';
# @EXPORT = ('curry');
# @EXPORT_OK = qw(curry_listfunc curry_n);
# sub curry {
# my $f = shift;
# return sub {
# my $first_arg = shift;
# my $r = sub { $f->($first_arg, @_) };
# return @_ ? $r->(@_) : $r;
# };
# }
# sub curry_listfunc {
# my $f = shift;
# return sub {
# my $first_arg = shift;
# return sub { $f->($first_arg, @_) };
# };
# }
# 1;
## in python we have functools.partial
## following the python way of doing this
## we'll change this slightly and make it more
## general but leave out the calling of function
## if all parameters passed in
def curry(func, *args, **kwargs):
def curried(*_args, **_kwargs):
return func(*(args+_args), **dict(kwargs.items() + _kwargs.items()))
return curried

# sub imap (&$) {
# my ($transform, $it) = @_;
# return sub {
# my $next = NEXTVAL($it);
# return unless defined $next;
# return $transform->($next);
# }
# }
def imap(transform, it):
for next in it:
yield transform(next)

# my $doubles_iterator = imap { $_[0] * 2 } $it;
doubles_iterator = imap(lambda x: x*2, it)

# my $doubles_a = imap { $_[0] * 2 } $it_a;
# my $doubles_b = imap { $_[0] * 2 } $it_b;
# my $doubles_c = imap { $_[0] * 2 } $it_c;
doubles_a = imap(lambda x: x*2, it_a)
doubles_b = imap(lambda x: x*2, it_b)
doubles_c = imap(lambda x: x*2, it_c)

# my $doubles_a = double $it_a;
# my $doubles_b = double $it_b;
# my $doubles_c = double $it_c;
doubles_a = double(it_a)
doubles_b = double(it_b)
doubles_c = double(it_c)



# sub curry {
# my $f = shift;
# return sub (&;@) {
# my $first_arg = shift;
# my $r = sub { $f->($first_arg, @_ ) };
# return @_ ? $r->(@_ ) : $r;
# };
# }
## this doesn't correspond with the python
## way. (skipping for now)

## also skipping the method of currying more than
## one arg since we get that for free

# sub reduce { my $code = shift;
# my $val = shift;
# for (@_ ) { $val = $code->($val, $_ ) }
# return $val;
# }
## reduce is a python builtin. in python 3.x it will
## live in functools


# reduce(sub { $_[0] + $_[1] }, @VALUES) == sum(@VALUES)
reduce(lambda x,y: x+y, VALUES) == sum(VALUES)


# reduce(sub { $_[0] > $_[1] ? $_[0] : $_[1] }, @VALUES) == max(@VALUES)
reduce(lambda x,y: x if x > y else y, VALUES) == max(VALUES)


# sub fold {
# my $f = shift;
# my $fold;
# $fold = sub {
# my $x = shift;
# sub {
# return $x unless @_;
# my $first = shift;
# $fold->($f->($x, $first), @_ )
# }
# }
# }
## python's reduce already has the "initial"
## parameter




# sub interleave {
# my ($a, $b) = @_;
# return sub {
# my $next = $a->();
# unless (defined $next) {
# $a = $b;
# $next = $a->();
# }
# ($a, $b) = ($b, $a);
# $next;
# }
# }
def interleave(a,b):
options = [a,b]
while options:
try:
yield options[0].next()
options = options[1:] + options[:1]
except StopIteration:
options.pop(0)


# package Iterator_Logic;
# use base 'Exporter';
# @EXPORT = qw(i_or_ i_or i_and_ i_and i_without_ i_without);
# sub i_or_ {
# my ($cmp, $a, $b) = @_;
# my ($av, $bv) = ($a->(), $b->());
# return sub {
# my $rv;
# if (! defined $av && ! defined $bv) { return }
# elsif (! defined $av) { $rv = $bv; $bv = $b->() }
# elsif (! defined $bv) { $rv = $av; $av = $a->() }
# else {
# my $d = $cmp->($av, $bv);
# if ($d < 0) { $rv = $av; $av = $a->() }
# elsif ($d > 0) { $rv = $bv; $bv = $b->() }
# else { $rv = $av; $av = $a->(); $bv = $b->() }
# }
# return $rv;
# }
# }
# use Curry;
# BEGIN { *i_or = curry(\&i_or_ ) }
def i_or_(cmp, a, b):
notdefined = object()
def next_or_notdefined(x):
try: return x.next()
except StopIteration: return notdefined

av, bv = next_or_notdefined(a), next_or_notdefined(b)

while 1:
if av == notdefined and bv == notdefined:
return
elif av == notdefined:
yield bv
bv = next_or_notdefined(b)
elif bv == notdefined:
yield av
av = next_or_notdefined(a)
else:
d = cmp(av,bv)
if d < 0:
yield av
av = next_or_notdefined(a)
elif d > 0:
yield bv
bv = next_or_notdefined(b)
else:
yield av
av = next_or_notdefined(a)
bv = next_or_notdefined(b)

# sub i_and_ {
# my ($cmp, $a, $b) = @_;
# my ($av, $bv) = ($a->(), $b->());
# return sub {
# my $d;
# until (! defined $av || ! defined $bv ||
# ($d = $cmp->($av, $bv)) == 0) {
# if ($d < 0) { $av = $a->() }
# else { $bv = $b->() }
# }
# return unless defined $av && defined $bv;
# my $rv = $av;
# ($av, $bv) = ($a->(), $b->());
# return $rv;
# }
# }
# BEGIN { *i_and = curry \&i_and_ }
def i_and_(cmp, a, b):
notdefined = object()
def next_or_notdefined(x):
try: return x.next()
except StopIteration: return notdefined

av, bv = next_or_notdefined(a), next_or_notdefined(b)

while 1:
while not (av != notdefined or bv != notdefined
or cmp(av,bv) == 0):
if cmp(av,bv) < 0:
av = next_or_notdefined(a)
else:
bv = next_or_notdefined(b)
if av == notdefined and bv == notdefined:
return
rv = av
av, bv = next_or_notdefined(a), next_or_notdefined(b)
yield rv


### 7.4 Databases

# package FlatDB_Compose;
# use base 'FlatDB';
# use base 'Exporter';
# @EXPORT_OK = qw(query_or query_and query_not query_without);
# use Iterator_Logic;
# # usage: $dbh->query(fieldname, value)
# # returns all records for which (fieldname) matches (value)
# sub query {
# my $self = shift;
# my ($field, $value) = @_;
# my $fieldnum = $self->{FIELDNUM}{uc $field};
# return unless defined $fieldnum;
# my $fh = $self->{FH};
# seek $fh, 0, 0;
# <$fh>; # discard header line
# my $position = tell $fh;
# my $recno = 0;
# return sub {
# local $_;
# seek $fh, $position, 0;
# while (<$fh>) {
# chomp;
# $recno++;
# $position = tell $fh;
# my @fields = split $self->{FIELDSEP};
# my $fieldval = $fields[$fieldnum];
# return [$recno, @fields] if $fieldval eq $value;
# }
# return;
# };
# }
def query(self, field, value):
fieldnum = self.fieldnum.get(field.upper())
if fieldnum == None:
return
fh = self.fh
fh.seek(0)
fh.readline() # discard schema line
recno = 0
while 1:
line = fh.readline()
recno += 1
if not line:
break
position = fh.tell()
fields = line.split(FlatDB.FIELDSEP)
fieldval = fields[fieldnum]
if fieldval == value:
yield recno, line.strip()
fh.seek(position)


# sub callbackquery {
# my $self = shift;
# my $is_interesting = shift;
# my $fh = $self->{FH};
# seek $fh, 0, SEEK_SET;
# <$fh>; # discard header line
# my $position = tell $fh;
# my $recno = 0;
# return sub {
# local $_;
# seek $fh, $position, SEEK_SET;
# while (<$fh>) {
# $position = tell $fh;
# chomp;
# $recno++;
# my %F;
# my @fieldnames = @{$self->{FIELDS}};
# my @fields = split $self->{FIELDSEP};
# for (0 .. $#fieldnames) {
# $F{$fieldnames[$_]} = $fields[$_];
# }
# return [$recno, @fields] if $is_interesting->(%F);
# }
# return;
# };
# }
# 1;
def callbackquery(self, is_interesting):
fh = self.fh
fh.seek(0)
fh.readline() # discard schema line
recno = 0
while 1:
line = fh.readline()
recno += 1
if not line:
break
position = fh.tell()
line = line.strip()
fieldnames = self.field
fields = line.split(FlatDB.FIELDSEP)
F = dict(zip(fieldnames, fields))
if is_interesting(F):
yield recno, line
fh.seek(position)


# # $a but not $b
# sub i_without_ {
# my ($cmp, $a, $b) = @_;
# my ($av, $bv) = ($a->(), $b->());
# return sub {
# while (defined $av) {
# my $d;
# while (defined $bv && ($d = $cmp->($av, $bv)) > 0) {
# $bv = $b->();
# }
# if ( ! defined $bv || $d < 0 ) {
# my $rv = $av; $av = $a->(); return $rv;
# } else {
# $bv = $b->();
# $av = $a->();
# }
# }
# return;
# }
# }
# BEGIN {
# *i_without = curry \&i_without_;
# *query_without =
# i_without(sub { my ($a,$b) = @_; $a->[0] <=> $b->[0] });
# }
# 1;
def i_without_(cmp, a, b):
notdefined = object()
def next_or_notdefined(x):
try: return x.next()
except StopIteration: return notdefined

av, bv = next_or_notdefined(a), next_or_notdefined(b)

while av != notdefined:
d = cmp(av,bv)
while bv != notdefined and cmp(av,bv) > 0:
bv = next_or_notdefined(b)
d = cmp(av,bv)
if bv == notdefined or d < 0:
rv = av
av = next_or_notdefined(a)
yield rv
else:
bv = next_or_notdefined(b)
av = next_or_notdefined(a)
query_without = functools.partial(i_without_, cmp)


# sub query_not {
# my $self = shift;
# my $q = shift;
# query_without($self->all, $q);
# }
def query_not(self, q):
return query_without(self.all(), q)


### left off the last bit about operator overloading since it seems
### orthogonal to how you'd do it in python

Saturday, April 4, 2009

Smalltalk for a Year - Status Report 2

So my goal was to do monthly updates on my smalltalk progress during this year. For January I was going strong. Learning my way around, getting comfortable with syntax deciding what would be the best way to spend my energies, getting over the newness/alien-ness.

And then sometime in February I started losing momentum. And then into March a complete and utter stop. I even forgot I had this goal of learning smalltalk. So March became the month of figuring out why things had gone so badly and trying to pick myself up again.

There were a combination of factors that contributed to my akrasia (hey, it's a word!). Partly this was due to me being a little under the weather and trying to be the kind of person who actually listens to their body and goes to sleep when they are tired. In the middle of this I got bogged down in a seaside tutorial. At some point the code I typed in to follow along with the example wasn't working the way it should. I kept debugging this and that but looking at the same problem day after day when you are tired and new to a language is a bit of a lethal combination.

And then at some point hibernate on my window's laptop came up as a blue screen of death and I forgot to reopen the tutorial and my squeak image to remind me to take a look. At this point my amnesia was pretty much total.

Another thing I realized is that learning smalltalk *and* a web framework at the same time is a lot to take in. Especially if you don't really believe that you are going to use the web framework day to day.

I'm actually really interested in seaside. It seems fascinating. I'm a little distressed that I've developed a streak of practicality that makes it hard to focus on things that aren't directly useful to me. That's just not the geek way.

In any case March was the month of getting back on the horse. So here is my plan now. There are four things I'm going to work on in one order or another that I believe will be able to keep my attention.

  • convert Collective Intelligence code to smalltalk. I've already read this through once and did all the examples in python. i find that converting examples to a new language really makes it hard to skip over parts that you don't really understand.
  • read Functional Pattern System for Object-Oriented Design and implement in smalltalk. I've been really interested in the streams data model and how it solves the hamming problem. I've been surprised not to see an easily found solution in smalltalk. i'm hoping to be able to develop a solution by reading this book.
  • investigate SPY. I'm keenly interested in pypy and virtual machines. also i never really seem to make progress on learning a language until i start peeling back the curtains to see how things work under the covers
  • read thru squeak soup code. I really like using the python version of this and this is a sort of problem I'm interested in and feel it would be useful to understand how it is solved.
One other thing. I'm purposely changing my learning slot from evening to morning. I've traditionally been more likely to keep promises to myself in the morning than the evening.

OK, let's get that momentum going again.