Friday, January 23, 2009

Higher Order Perl (Python Style) : Chapter 3 - Caching and Memoization


### 3.1 Caching Fixes Recursion

# sub RGB_to_CMYK {
# my ($r, $g, $b) = @_;
# my ($c, $m, $y) = (255-$r, 255-$g, 255-$b);
# my $k = $c < $m ? ($c < $y ? $c : $y)
# : ($m < $y ? $m : $y); # Minimum
# for ($c, $m, $y) { $_ -= $k }
# [$c, $m, $y, $k];
# }
def rgb_to_cmyk(r,g,b):
c, m, y = 255-r, 255-g, 255-b
k = min([c,m,y])
c, m, y = c - k, m - k, y - k
return c, m, y, k

# my %cache;
# sub RGB_to_CMYK {
# my ($r, $g, $b) = @_;
# my $key = join ',', $r, $g, $b;
# return $cache{$key} if exists $cache{$key};
# my ($c, $m, $y) = (255-$r, 255-$g, 255-$b);
# my $k = $c < $m ? ($c < $y ? $c : $y)
# : ($m < $y ? $m : $y); # Minimum
# for ($c, $m, $y) { $_ -= $k }
# return $cache{$key} = [$c, $m, $y, $k];
# }
CACHE = {}
def rgb_to_cmyk(r,g,b):
if (r,g,b) not in CACHE:
c, m, y = 255-r, 255-g, 255-b
k = min([c,m,y])
c, m, y = c - k, m - k, y - k
CACHE[(r,g,b)] = (c,m,y,k)
return CACHE[(r,g,b)]


# Compute the number of pairs of rabbits alive in month n
# sub fib {
# my ($month) = @_;
# if ($month < 2) { 1 }
# else {
# fib($month-1) + fib($month-2);
# }
# }
def fib(month):
if month < 2:
return 1
else:
return fib(month - 1) + fib(month - 2)


### 3.2 Inline Caching

# # Compute the number of pairs of rabbits alive in month n
# { my %cache;
# sub fib {
# my ($month) = @_;
# unless (exists $cache{$month}) {
# if ($month < 2) { $cache{$month} = 1 }
# else {
# $cache{$month} = fib($month-1) + fib($month-2);
# }
# }
# return $cache{$month};
# }
# }
# NOTE: we include the function itself as part of
# the hash key since python can't do the
# scoping technique above w/o hackery (AFAIK).
# Of course the more natural python way
# would probably be just to use a decorator
CACHE = {}
def fib(month):
if (fib, month) not in CACHE:
if month < 2:
CACHE[(fib, month)] = 1
else:
CACHE[(fib, month)] = fib(month - 1) + fib(month - 2)
return CACHE[(fib, month)]


# sub fib {
# my %cache;
# ...
# }
def fib(month):
CACHE = {}
...

### 3.3 Good Ideas

# sub some_function {
# $result = some computation involving @_;
# return $result;
# }
def some_function(*x):
result = some computation involving x
return result

# { my %cache;
# sub some_function_with_caching {
# my $key = join ',', @_;
# return $cache{$key} if exists $cache{$key};
# $result = the same computation involving @_;
# return $cache{$key} = $result;
# }
# }
CACHE = {}
def some_function_with_caching(*x):
key = (some_function_with_caching, x)
if key not in CACHE:
CACHE[key] = the same computation involving *x
return CACHE[key]


### 3.4 Memoization

# use Memoize;
# memoize 'fib';
# # Compute the number of pairs of rabbits alive in month n
# sub fib {
# my ($month) = @_;
# if ($month < 2) { 1 }
# else {
# fib($month-1) + fib($month-2);
# }
# }
# Just for sake of argument let's pretend we have a "memoize"
# module that provides a memoize decorator
@memoize
def fib(month):
if month < 2:
return 1
else:
return fib(month - 1) + fib(month - 2)

### 3.5 The Memoize Module

# sub memoize {
# my ($func) = @_;
# my %cache;
# my $stub = sub {
# my $key = join ',', @_;
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# };
# return $stub;
# }

# One reasonable way to do this in python is as a decorator. To keep
# things simple we'll leave out **kwargs since we can't (simply) use
# dicts as parts of keys. We also assume for now that args contains
# only things that can be in a dict key
def memoize(func):
CACHE = {}
def memoized_func(*args):
if args not in CACHE:
CACHE[args] = func(*args)
return CACHE[args]
return memoized_func

# NOTE: running the above with fib actually gave
# "RuntimeError: maximum recursion depth exceeded"
# for month = 1000. But if you do some smaller
# values first to "seed" it it will handle the
# larger values.




#$fastfib = memoize(\&fib);
@memoize
def fib(month):
....
# or
# fastfib = memoize(fib)


# sub make_counter {
# my $n = shift;
# return sub { print "n is ", $n++ };
# }
# my $x = make_counter(7);
# my $y = make_counter(20);

# We cheat a little here since python's scoping
# won't let's us work with the n directly
def make_counter(n):
x = [n]
def counter():
print "n is", x[0]
x[0] += 1
return counter
x = make_counter(7)
y = make_counter(20)


### 3.6 Caveats

# use Memoize;
# sub iota {
# my $n = shift;
# return [1 .. $n];
# }
# memoize 'iota';
# $i10 = iota(10);
# $j10 = iota(10);
# pop @$i10;
# print @$j10;
@memoize
def iota(n):
return range(n)
i10 = iota(10)
j10 = iota(10)
i10.pop()
print j10


# package Octopus;
# sub new {
# my ($class, %args) = @_;
# $args{tentacles} = 8;
# bless \%args => $class;
# }

# sub name {
# my $self = shift;
# if (@_) { $self->{name} = shift }
# $self->{name};
# }

# my $junko = Octopus->new(favorite_food => "crab cakes");
# $junko->name("Junko");
# my $fenchurch = Octopus->new(favorite_food => "crab cakes");
# $fenchurch->name("Fenchurch");

# # This prints "Fenchurch" -- oops!
# print "The name of the FIRST octopus is ", $junko->name, "\n";
class Octopus(object):
def __init__(self, **kwargs):
self.tentacles = 8
self.__dict__.update(kwargs)

def name(self, new_name=None):
if new_name != None:
self.name = new_name
return self.name

# OK, this one has me stumped. The text says
# that "new" was memoized, but I'm not seeing it
# Either a typo on the books part or a thinko on mine
# But the lesson about caching mutable items is well taken


#@result = sort { -M $a <=> -M $b } @files;
result = sorted(files, key=(lambda x: os.path.getmtime(x)))


### 3.7 Key Generation

#my $key = join ',', @_;
key = ",".join([str(x) for x in key_list])
# or just use a tuple and avoid problems in the first place
key = tuple(key_list)


# my @args = @_;
# s/([\\,])/\\$1/g for @args;
# my $key = join ",", @args;
key = ",".join([str(x).replace(",","\,").replace("\\","\\\\") for x in key_list])


# sub memoize {
# my ($func, $keygen) = @_;
# my %cache;
# my $stub = sub {
# my $key = $keygen ? $keygen->(@_) : join ',', @_;
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# };
# return $stub;
# }
# There are other ways to do this, but we want
# to stay close to the perl version. Now
# memoize will need to be invoked as
#@memoize() # need the "()" here
#@memoize(keygen_func)
def memoize(keygen=None):
CACHE = {}
def outer_func(func):
def inner_func(*args):
key = args
if keygen:
key = keygen(args)
if key not in CACHE:
CACHE[key] = func(*args)
return CACHE[key]

return inner_func
return outer_func


# sub memoize {


# my ($func, $keygen) = @_;
# my %cache;
# my $stub = $keygen ?
# sub { my $key = $keygen->(@_);
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# }
# :
# sub { my $key = join ',', @_;
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# }
# ;
# return $stub;
# }
def memoize(keygen=None):
CACHE = {}
def outer_func(func):
def inner_func_no_keygen(*args):
key = args
if key not in CACHE:
CACHE[key] = func(*args)
return CACHE[key]

def inner_func_with_keygen(*args):
key = keygen(args)
if key not in CACHE:
CACHE[key] = func(*args)
return CACHE[key]

return keygen and inner_func_with_keygen or inner_func_no_keygen
return outer_func


# $memoized = memoize(\&fib, q{my @args = @_;
# s/([\\,])/\\$1/g for @args;
# join ',', @args;
# });

# sub memoize {
# my ($func, $keygen) = @_;
# $keygen ||= q{join ',', @_};
# my %cache;
# my $newcode = q{
# sub { my $key = do { KEYGEN };
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# }
# };
# $newcode =~ s/KEYGEN/$keygen/g;
# return eval $newcode;
# }

# $keygen ||= 'join \',\', @_';

# my $newcode = "
# sub { my \$key = do { $keygen };
# \$cache{\$key} = \$func->(\@_) unless exists \$cache{\$key};
# return \$cache{\$key};
# }
# ";


# sub memoize {
# my ($func, $keygen) = @_;
# my $keyfunc;
# if ($keygen eq '') {
# $keygen = q{join ',', @_}
# } elsif (UNIVERSAL::isa($keygen, 'CODE')) {
# $keyfunc = $keygen;
# $keygen = q{$keyfunc->(@_)};
# }
# my %cache;
# my $newcode = q{
# sub { my $key = do { KEYGEN };
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# }
# };
# $newcode = ̃ s/KEYGEN/$keygen/g;
# return eval $newcode;
# }

# if (ref($keygen) eq 'CODE') { ... }

"""
UNCLE! This seems too far off the path for a sane python translation.
Actually I can think of a way to do it, you just wouldn't, so unless
there is a great public outcry, I will just skip trying to force this
down the python's throat.
"""

# sub example {
# my %args = @_;
# $args{A} = 32 unless defined $args{A};
# $args{B} = 17 unless defined $args{B};
# # ...
# }
def example(**args):
args.setdefault("A", 32)
args.setdefault("B", 17)
#

# example(C => 99);
# example(C => 99, A => 32);
# example(A => 32, C => 99);
# example(B => 17, C => 99);
# example(C => 99, B => 17);
# example(A => 32, C => 99, B => 17);
# example(B => 17, A => 32, C => 99);
# (etc.)

example(C=99)
example(C=99, A=32)
example(A=32, C=99)
example(B=17, C=99)
example(C=99, B=17)
example(A=32, C=99, B=17)
example(B=17, A=32, C=99)
(etc.)

# NOTE: the problem in python is that
# we just can't use dicts as a hash
# otherwise this would be a no brainer


# sub {
# my %h = @_;
# $h{A} = 32 unless defined $h{A};
# $h{B} = 17 unless defined $h{B};
# join ",", @h{'A','B','C'};
# }
# we give this function a name so that we don't
# have to work around python's lambda limitations
def _keygen(a, h):
h.setdefault("A", 32)
h.setdefault("B", 17)
# a tuple key is probably more natural
return tuple([x[1] for x in sorted(h.items())])
# but we can make a string just as simply
# return ','.join([str(x[1]) for x in sorted(args.items())])

# keygen only has to work over the set of expected values so we can go ahead and
# do something like:
# have the key be a tuple of (args, kwargs)
# where kwargs has been adapted to a tuple of key,value pair tuples
# BUT now our keygen needs to handle *Both* types of inputs
def memoize(keygen=None):
CACHE = {}
def outer_func(func):
def inner_func(*args, **kwargs):
kwargs_ = tuple(sorted(kwargs.items()))
key = (args, kwargs_)
if keygen:
key = keygen(args, kwargs)
if key not in CACHE:
print "updating cache:", key
CACHE[key] = func(*args, **kwargs)
return CACHE[key]

return inner_func
return outer_func


# sub example {
# my ($a, $b, $c) = @_;
# $a = 32 unless defined $a;
# $b = 17 unless defined $b;
# # more calculation here ...
# }
def example(a=32, b=17,c=None):
# more calculation here ...

# my ($a, $b, $c) = @_;
# $a = 32 unless defined $a;
# $b = 17 unless defined $b;
# join ',', $a, $b, $c;

# first three lines handled by machinery of argument passing in python
return ",".join([str(x) for x in (a,b,c)])


# sub { my $key = do { $_[0] = 32 unless defined $_[0];
# $_[1] = 17 unless defined $_[1];
# join ',', @_;
# };
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# }

# Hmm... very likely I'm missing a sublety here but
# is seems that python's default args already handles
# this case. Plus the fact that (if I'm understanding
# correctly), python doesn't map well to the "do { }"
# syntax. I'll punt for now.


# sub set_to_57 {
# $_[0] = 57;
# }
# my $x = 119;
# set_to_57($x);

# you can't take advantage of this (or get bit by, depending
# on your perspective) in python, but cause the args are pass
# by value.


# sub safe_function {
# my ($n) = @_;
# $n = 57; # does *not* set $x to 57
# }
# my $x = 119;
# safe_function($x);
def safe_function(n):
n = 57
x = 119
safe_function(x)


# memoize(\&example, q{
# my ($a, $b, $c) = @_;
# $a = 32 unless defined $a;
# $b = 17 unless defined $b;
# @_ = ($a, $b, $c); # line 5
# join ',', @_;
# });
# We avoid sending the keygen function as a string
# since it is too unpythonic.
# Here we are assuming that the arguments are positional
# and "None" if not available. Usually we would use
# key word args in this situation
def _keygen(args, kwargs):
print "args=",args,"kwargs=",kwargs
a = args[0] == None and 32 or args[0]
b = args[1] == None and 17 or args[1]
c = args[2]
return (a,b,c)
# or return ",".join([str(x) for x in (a,b,c)])

@memoize(_keygen)
def exercise(*args):
a,b,c = (args + [None]*3)[:3]
...


# sub is_in {
# my ($needle, $haystack) = @_;
# for my $item (@$haystack) {
# return 1 if $item == $needle;
# }
# return;
# }
def is_in(needle, haystack):
for item in haystack:
if item == needle:
return True
return False

# if (is_in($my_id, \@employee_ids)) { ... }
if is_in(my_id, employee_ids):
...

#sub { join ",", $_[0], @{$_[1]} }
# In python the most obvious solution here
# is again to just use tuple()


# sub find_share {
# my ($target, $treasures) = @_;
# return [] if $target == 0;
# return if $target < 0 || @$treasures == 0;
# my ($first, @rest) = @$treasures;
# my $solution = find_share($target-$first, \@rest);
# return [$first, @$solution] if $solution;
# return find_share($target , \@rest);
# }


def find_share(target, treasures):
if target == 0:
return []
if target < 0 or len(treasures) == 0:
return
first, rest = treasures[0], treasures[1:]
solution = find_share(target-first, rest)
if solution != None:
return [first] + solution
return find_share(target, rest)



# sub delivery_charge {
# my ($quantity_ordered) = @_;
# my ($hour, $day_of_week) = (localtime)[2,6];
# # perform complex computation involving $weight, $gross_cost,
# # $hour, $day_of_week, and $quantity_ordered
# # ...
# return $delivery_charge;
# }
import time
def delivery_charge(quantity_ordered):
localtime = time.localtime()
hour, day_of_week = localtime[2], localtime[6]
# perform complex computation involving $weight, $gross_cost,
# $hour, $day_of_week, and $quantity_ordered
# ...
return delivery_charge


# sub delivery_charge_key {
# join ',', @_, (localtime)[2,6];
# }
def delivery_charge_key(quantity_ordered):
localtime = time.localtime()
return ",".join([str(x) for x in (quantity_ordered,localtime[2], localtime[6])])
## or just return (quantity_ordered,localtime[2], localtime[6])


# sub delivery_charge_key {
# my ($hour, $day_of_week) = (localtime)[2,6];
# my $weekend = $day_of_week == 0 || $day_of_week == 6;
# join ',', @_, $hour, $weekend;
# }
def delivery_charge_key(quantity_ordered):
localtime = time.localtime()
hour, day_of_week = localtime[2], localtime[6]
weekend = day_of_week in (0,6)
return ",".join([str(x) for x in (quantity_ordered, hour, weekend)])
## or just return (quantity_ordered, hour, weekend)


### 3.8 Caching in Object Methods


# package Investor;
# # Compute total amount currently invested
# sub total {
# my $self = shift;
# # ... complex computation performed here ...
# return $total;
# }
class Investor(object):
def total(self):
# ... complex computation performed here ...
return total

# # Compute total amount currently invested
# { my %cache;
# sub total {
# my $self = shift;
# return $cache{$self} if exists $cache{$self};
# # ... complex computation performed here ...
# return $cache{$self} = $total;
# }
# }
# One way to simulate this in python would be to
# have a cache "class" variable
class Investor(object):
_cache = {}
def total(self):
# ... complex computation performed here ...\
key = (self, Investor.total)
if key not in Investor._cache:
# ... complex computation performed here ...
Investor._cache[key] = total
return Investor._cache[key]


# # here 90,000 is returned from the cache
# $old_total = $old_object->total();
# undef $old_object;
# $new_object = Investor->new();
# $new_total = $new_object->total();
old_total = old_object.total()
del old_object
new_object = Investor()
new_total = new_object.total()

# NOTE: in addition to manually invoked "destroy"
# method recommended in the text, we could
# also have __del__ method, though that might not be
# reliable. Also could just stick in a unix timestamp
# as part of the key so new objects with same memory location
# won't be confusing


# # Compute total amount currently invested
# sub total {
# my $self = shift;
# return $self->{cached_total} if exists $self->{cached_total};
# # ... complex computation performed here ...
# return $self->{cached_total} = $total;
# }
def total(self):
if self.cached_total == None:
# ... complex computation performed here ...
self.cached_total = total
return self.cached_total

# # Compute total amount currently invested
# { my %cache;
# sub total {
# my $self = shift;
# return $cache{$self} if exists $cache{$self};
# # ... complex computation performed here ...
# return $cache{$self} = $total;
# }
# sub expire_total {
# my $self = shift;
# delete $cache{$self};
# }
# }
# sub invest {
# my ($self, $amount, ...) = @_;
# $self->expire_total;
# ...
# }
class Investor(object):
_cache = {}
def total(self):
# ... complex computation performed here ...\
key = (self, Investor.total)
if key not in Investor._cache:
# ... complex computation performed here ...
Investor._cache[key] = total
return Investor._cache[key]


def expire_total(self):
key = (self, Investor.total)
del Investor._cache[key]

def invest(self, ammount, ...):
self.expire_total()
...


# # Compute total amount currently invested
# sub total {
# my $self = shift;
# return $self->{cached_total} if exists $self->{cached_total};
# # ... complex computation performed here ...
# return $self->{cached_total} = $total;
# }
# sub invest {
# my ($self, $amount, ...) = @_;
# delete $self->{cached_total};
# ...
# }

def total(self):
if self.cached_total == None:
# ... complex computation performed here ...
self.cached_total = total
return self.cached_total

def invest(self, amount, ...):
self.cached_total = None
...


# sub memoize_method {
# my ($method, $key) = @_;
# return sub {
# my $self = shift;
# return $self->{$key} if exists $self->{$key};
# return $self->{$key} = $method->($self, @_);
# };
# }
# As a decorator
def memoize_method(key):
def outter(method):
def inner(self):
if self.__dict__.get(key) == None:
print "cache miss:", method, key, self
self.__dict__[key] = method(self)
return self.__dict__[key]
return inner
return outter


#*Investor::total = memoize_method(\&Investor::total, 'cached_total');
#$investor_bob->total;

@memoize_method("cached_total")
def total(self):
...

investor_bob.total()


# $memoized_total = memoize_method(\&Investor::total, 'cached_total');
# $investor_bob->$memoized_total;

# I suppose you could also use the pre-decorator syntax:

def total(self):
...
total = memoize_method("cached_total")(total)

investor_bob.total()




### 3.9 Persistent Caches


# use DB_File;
# sub memoize {
# my ($func, $keygen, $file) = @_;
# my %cache;
# if (defined $file) {
# tie %cache => 'DB_File', $file, O_RDWR|O_CREAT, 0666
# or die "Couldn’t access cache file $file: $!; aborting";
# }
# my $stub = sub {
# my $key = $keygen ? $keygen->(@_) : join ',', @_;
# $cache{$key} = $func->(@_) unless exists $cache{$key};
# return $cache{$key};
# };
# return $stub;
# }
import anydbm

def memoize(keygen=None, cache_file=None):
if cache_file:
cache = anydbm.open(cache_file, 'c')
else:
cache = {}
def outer_func(func):
def inner_func(*args):
key = str(args)
if keygen:
key = keygen(args)
if key not in cache:
cache[key] = func(*args)
return cache[key]

return inner_func
return outer_func



# sub memoize {
# my ($func, $keygen, $cache) = @_;
# $cache = {} unless defined $cache;
# my $stub = sub {
# my $key = $keygen ? $keygen->(@_) : join ',', @_;
# $cache->{$key} = $func->(@_) unless exists $cache->{$key};
# return $cache->{$key};
# };
# return $stub;
# }
def memoize(keygen=None, cache=None):
if cache == None:
cache = {}
def outer_func(func):
def inner_func(*args):
key = str(args)
if keygen:
key = keygen(args)
if key not in cache:
cache[key] = func(*args)
return cache[key]

return inner_func
return outer_func

### 3.10 Alternatives To Memoization

# @sorted_numbers = sort { $a <=> $b } @numbers;
# This is default behavior for sort
sorted_numbers = sorted(numbers)



# @sorted_numbers = sort numerically @numbers;
# sub numerically { $a <=> $b }
sorted_numbers = sorted(numbers, cmp=numerically)
def numerically(a,b):
return cmp(a,b)



# @sorted_dates = sort chronologically @dates;

# %m2n =
# ( jan => 1, feb => 2, mar => 3,
# apr => 4, may => 5, jun => 6,
# jul => 7, aug => 8, sep => 9,
# oct => 10, nov => 11, dec => 12, );

# sub chronologically {
# my ($am, $ad, $ay) =
# ($a =~ /(\w{3}) (\d+), (\d+)/);
# my ($bm, $bd, $by) =
# ($b =~ /(\w{3}) (\d+), (\d+)/);
# $ay <=> $by
# || $m2n{lc $am} <=> $m2n{lc $bm}
# || $ad <=> $bd;
# }

sorted_dates = sorted(dates, chronologically)

m2n = {
"jan" : 1, "feb" : 2, "mar" : 3,
"apr" : 4, "may" : 5, "jun" : 6,
"jul" : 7, "aug" : 8, "sep" : 9,
"oct" : 10, "nov" : 11, "dec" : 12
}

def chronologically(a,b):
am, ad, ay = re.search("(\w{3}) (\d+), (\d+)", a).groups()
bm, bd, by = re.search("(\w{3}) (\d+), (\d+)", b).groups()

return (cmp(int(ay), int(by)) or
cmp(m2n[am.lower()], m2n[bm.lower()]) or
cmp(int(ad), int(bd)))

# ## or more simply
# return cmp((int(ay), m2n[am.lower()], int(ad)),
# (int(by), m2n[bm.lower()], int(bd)))



# @sorted_dates = sort chronologically @dates;
# %m2n =
# ( jan => 1, feb => 2, mar => 3,
# apr => 4, may => 5, jun => 6,
# jul => 7, aug => 8, sep => 9,
# oct => 10, nov => 11, dec => 12, );
# sub chronologically {
# my ($am, $ad, $ay) = split_date($a);
# my ($bm, $bd, $by) = split_date($b);
# $ay <=> $by
# || $m2n{lc $am} <=> $m2n{lc $bm}
# || $ad <=> $bd;
# }
# sub split_date {
# $_[0] =~ /(\w{3}) (\d+), (\d+)/;
# }


sorted_dates = sorted(dates, chronologically)

m2n = {
"jan" : 1, "feb" : 2, "mar" : 3,
"apr" : 4, "may" : 5, "jun" : 6,
"jul" : 7, "aug" : 8, "sep" : 9,
"oct" : 10, "nov" : 11, "dec" : 12
}

def chronologically(a,b):
am, ad, ay = split_date(a)
bm, bd, by = split_date(b)

return (cmp(int(ay), int(by)) or
cmp(m2n[am.lower()], m2n[bm.lower()]) or
cmp(int(ad), int(bd)))

def split_date(dt):
return re.search("(\w{3}) (\d+), (\d+)", dt).groups()



@sorted_dates = sort chronologically @dates; COD E L IB RARY
chrono-3
# %m2n =
# ( jan => 1, feb => 2, mar => 3,
# apr => 4, may => 5, jun => 6,
# jul => 7, aug => 8, sep => 9,
# oct => 10, nov => 11, dec => 12, );

# sub chronologically {
# date_to_string($a) cmp date_to_string($b)
# }

# sub date_to_string {
# my ($m, $d, $y) = ($_[0] =~ /(\w{3}) (\d+), (\d+)/);
# sprintf "%04d%02d%02d", $y, $m2n{lc $m}, $d;
# }

m2n = {
"jan" : 1, "feb" : 2, "mar" : 3,
"apr" : 4, "may" : 5, "jun" : 6,
"jul" : 7, "aug" : 8, "sep" : 9,
"oct" : 10, "nov" : 11, "dec" : 12
}

def chronologically(a,b):
return cmp(date_to_string(a), date_to_string(b))

def date_to_string(dt):
m, d, y = re.search("(\w{3}) (\d+), (\d+)", dt).groups()
return "%04d%02d%02d" % (int(y), m2n[m.lower()], int(d))

# { my %cache;
# sub chronologically {
# ($cache{$a} ||= date_to_string($a))
# cmp
# ($cache{$b} ||= date_to_string($b))
# }
# }
# In python we can't do the scope trick with cache
# but we ignore that for now
CACHE = {}
def chronologically(a, b):
if a not in CACHE:
CACHE[a] = date_to_string(a)
if b not in CACHE:
CACHE[b] = date_to_string(b)

return cmp(CACHE[a], CACHE[b])


# { my %cache;
# sub by_total_invested {
# ($cache{$a} ||= total_invested($a))
# <=>
# ($cache{$b} ||= total_invested($b))
# }
# }
CACHE = {}
def by_total_invested(a,b):
if not CACHE.get(a):
CACHE[a] = total_invested(a)
if not CACHE.get(b):
CACHE[b] = total_invested(b)

return cmp(CACHE[a], CACHE[b])


# { my %cache;
# sub by_total_invested {
# (exists $cache{$a} ? $cache{$a} : ($cache{$a} = total_invested($a)))
# <=>
# (exists $cache{$b} ? $cache{$b} : ($cache{$b} = total_invested($b)))
# }
# }
CACHE = {}
def by_total_invested(a,b):
if a not in CACHE:
CACHE[a] = total_invested(a)
if b not in CACHE:
CACHE[b] = total_invested(b)

return cmp(CACHE[a], CACHE[b])

# { my %cache;
# sub by_total_invested {
# ($cache{$a} ||= total_invested($a) || "0e0")
# <=>
# ($cache{$b} ||= total_invested($b) || "0e0")
# }
# }
# I can't think of a reason to try to emulate this trick
# (or how, frankly)

### 3.11 Evangelism

### 3.12 The Benefits of Speed

# sub function {
# if (++$CALLS == 100) { memoize 'function'}
# ...
# }
def function():
CALL += 1
if CALLS == 100:
function = memoize(function)
...

# use Time::HiRes 'time';
# my (%time, %calls);
# sub profile {
# my ($func, $name) = @_;
# my $stub = sub {
# my $start = time;
# my $return = $func->(@_);
# my $end = time;
# my $elapsed = $end - $start;
# $calls{$name} += 1;
# $time{$name} += $elapsed;
# return $return;
# };
# return $stub;
# }
# This is another good case for a decorator
import time
time_ = {}
calls = {}
def profile(func):
calls.setdefault(func.__name__, 0)
time_.setdefault(func.__name__, 0)
def stub(*args):
start = time.time()
return_ = func(*args)
end = time.time()
elapsed = end - start
calls[func.__name__] += 1
time_[func.__name__] += elapsed
return return_
return stub

@profile
def foo(x):
return x

# END {
# printf STDERR "%-12s %9s %6s\n", "Function", "# calls", "Elapsed";
# for my $name (sort {$time{$b} <=> $time{$a}} (keys %time)) {
# printf STDERR "%-12s %9d %6.2f\n", $name, $calls{$name}, $time{$name};
# }
# }
print >>sys.stderr, "%-12s %9s %6s\n" % ("Function", "# calls", "Elapsed")
for name, _time in sorted(time_.items(), key=lambda x: x[1]):
print >>sys.stderr, "%-12s %9d %6.2f\n" % (name, calls[name], time_[name])

No comments: