#! /usr/local/bin/perl -w use strict; # (C) 2007, Benoit Hudson # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # Create a graph of a set of proofs, from LaTeX sources. Output is in the # graphviz 'dot' format. # The following will generate a pdf of your proof: # parse-proof foo.tex | dot -Tpdf -o foo.pdf # Upon a \label in a theorem-like environment (what counts as theorem-like is # mentioned below), stores the name of the theorem. In the subsequent proof # environment, any \ref will create an edge to the theorem from the reference. # You can use %\ref or \comment{\ref{...}} to avoid having the ref appear in # the text. # TODO: begin and end statements for all environments must be at the beginning # of the line, or weird things may happen. This is because I'm lazy. # TODO: I completely ignore comments. Commented-out sections of proofs will # generate edges just as surely as anything else. # # Theorem-like environments are labeled below in %nickname. The name of the # label must also match the environment: in a theorem, use \label{thm:foo}; in # a lemma, lem:, and so on. All other labels are ignored. I assume that # statements don't nest (i.e. we don't state a lemma while stating a theorem). # # In the output graph, definitions (def) are boxes, theorems (thm) are # diamonds, everything else is the default. # Generalize over kinds of statements. We ignore labels, refs, and # environments not in this list. my %environment = ( 'thm' => 'theorem', 'fact' => 'fact', 'def' => 'definition', 'lem' => 'lemma' ); my %nickname; for my $key (keys %environment) { $nickname{$environment{$key}} = $key; } # Parsing information. my $filename; my $theorem_name; my $theorem_type; my $theorem_long_name; # The final output. my %edges; # keyed by \ref, points to list of theorem_name my %nodes; # keyed by theorem_name, points to the optional long name or undef. my @states = ('NONE', 'INTHM', 'HAVE_THM', 'INPROOF'); my $state = 'NONE'; sub warning { print STDERR ("$filename:$.: warning: ", @_, "\n"); } sub warn_unnamed { warning "unnamed $environment{$theorem_type}"; } sub warn_unproved { warning ("unproved $environment{$theorem_type} ", getName()); } sub getName { if (!defined $theorem_name) { warn_unnamed(); $theorem_name = "$theorem_type:$filename:$."; } return $theorem_name; } sub makeNode { die unless defined $theorem_type; $nodes{getName()} = $theorem_long_name; } sub parse { $filename = shift @_; open(FILE, $filename) or do { warn "Can't open $filename: $!\n"; return; }; $. = 0; while() { chomp; if (/^\\begin{([^}]*)}/) { my $env = $1; if (exists $nickname{$env}) { $theorem_type = $nickname{$env}; if (/\[([^\]]*)\]/) { $theorem_long_name = $1; $theorem_long_name =~ s/\\/\\\\/; } else { $theorem_long_name = undef; } $state = 'INTHM'; } elsif ($env eq 'proof') { unless ($state eq 'HAVE_THM') { warning "proof without theorem"; next; } makeNode(); $state = 'INPROOF'; } } elsif (/^\\end{([^}]*)}/) { my $env = $1; if ($env eq 'proof') { $theorem_type = undef; $theorem_name = undef; $state = 'NONE'; } elsif ($state eq 'INTHM' && $env eq $environment{$theorem_type}) { $state = 'HAVE_THM'; } } elsif (/\\label{([^}]*)}/) { my $label = $1; if ($state eq 'INTHM' && $label =~ /^$theorem_type/) { $theorem_name = $label; makeNode(); } # TODO warning "multiple labels" if /\\label.*\\label/; } elsif ($state ne 'NONE' && /\\ref{([^\}]*)}/) { my $ref = $1; if (exists $edges{$ref}) { push(@{$edges{$ref}}, getName()); } else { $edges{$ref} = [getName()]; } warning "multiple refs" if /\\ref.*\\ref/; } } } ################## ## output # Print one node. Does not print nodes already printed. # Adds options in [] if there are any to add. Particularly, this is how I set # the shape of def and thm nodes, and the label in general. my %printed_nodes; sub printNode { my $node = shift @_; return if $printed_nodes{$node}++; print " \"$node\""; # collect a number of options to print with the node. my @options; # print the long name, if any, as the label to use for the node if (defined $nodes{$node}) { push(@options, "label=\"$nodes{$node}\""); } # set the shape according to type if ($node =~ /^thm:/) { push(@options, "shape=diamond"); } elsif ($node =~ /^def:/) { push(@options, "shape=box"); } # print the options print ' [', join(' ', @options), ']' if @options; print ";\n"; } # print the nodes, and then the edges sub output { print "digraph proof {\n"; # collect all nodes, whether we saw them defined or not my @nodes = keys %nodes; for my $from (keys %edges) { push(@nodes, $from); for my $to (@{$edges{$from}}) { push(@nodes, $to); } } for my $node (sort @nodes) { printNode($node); } for my $from (sort keys %edges) { for my $to (@{$edges{$from}}) { print " \"$from\" -> \"$to\";\n"; } } print "}\n"; } ########################################################################### ## Mainline! for (@ARGV) { parse $_; } output();