MEAD
Extract
Privates (from my definitions) |
%read_extract_sents; |
MEAD::Extract opens and has methods to access various parts of a MEAD-style extract. Since the extract format differs between DUC-sytle extracts and MEAD-style extracts, we have a two classes.
|
_really_open_me | No description | Code |
by_did_and_sno | No description | Code |
extract_to_summary | No description | Code |
get_DID_for_sentence | No description | Code |
get_SNO_for_sentence | No description | Code |
get_num_sentences | No description | Code |
open_from_file | No description | Code |
read_extract | No description | Code |
read_extract_handle_start | No description | Code |
sentref_array_to_extract | No description | Code |
write_extract | No description | Code |
Methods description
Methods code
_really_open_me | description | top | prev | next |
sub _really_open_me
{ my ($self, $filename) = @_;
my $extract_tree = XML::TreeBuilder->new;
$extract_tree->parsefile($filename);
my $extract_node = $extract_tree->find_by_tag_name("EXTRACT");
my $node;
my $i = 0;
while ($node = $extract_node->look_down("_tag", "S", "ORDER", ++$i)) {
my $DID = $node->attr("DID");
my $SNO = $node->attr("SNO");
my $ORDER = $node->attr("ORDER");
die unless ($DID and $SNO);
$self->{DID_list}[$ORDER] = $DID;
$self->{SNO_list}[$ORDER] = $SNO;
}
##cleanup.
$extract_tree->delete;
}
sub by_did_and_sno
{
my $comp = $$a{'DID'} cmp $$b{'DID'};
return $comp if $comp != 0;
return $$a{'SNO'} <=> $$b{'SNO'};
}
sub extract_to_summary
{ my $extract = shift;
my $cluster = shift;
my $summary = {};
foreach my $order (keys %{$extract}) {
my $sentref = $$extract{$order};
my $did = $$sentref{'DID'};
my $sno = $$sentref{'SNO'};
my $cluster_docref = $$cluster{$did};
my $cluster_sentref = $$cluster_docref[$sno];
my $summary_sentref = {};
$$summary_sentref{'DID'} = $did;
$$summary_sentref{'SNO'} = $sno;
$$summary_sentref{'TEXT'} = $$cluster_sentref{'TEXT'};
$$summary{$order} = $summary_sentref;
}
return $summary;
}
sub get_DID_for_sentence
{ my ($self, $sentence) = @_;
return $self->{DID_list}[$sentence];
}
sub get_SNO_for_sentence
{ my ($self, $sentence) = @_;
return $self->{SNO_list}[$sentence];
}
sub get_num_sentences
{ my $self = shift;
return scalar @{$self->{DID_list}} - 1;
}
sub open_from_file
{ my ($class, $filename) = @_;
my $self = {};
bless $self, $class;
$self->_really_open_me($filename);
return $self;
}
sub read_extract
{ my $extract_arg = shift;
%read_extract_sents = ();
### Begin parsing the extract
my $extract_parser = new XML::Parser(Handlers => {
'Start' =>\& read_extract_handle_start});
if (ref $extract_arg) {
$extract_parser->parse($extract_arg);
} else {
$extract_parser->parsefile($extract_arg);
}
return\% read_extract_sents;
}
sub read_extract_handle_start
{ shift;
my $element_name = shift;
my %atts = @_;
if ($element_name eq 'S') {
my $num = $atts{'ORDER'};
my $did = $atts{'DID'};
my $sno = $atts{'SNO'};
my $sentref = {};
$$sentref{'DID'} = $did;
$$sentref{'SNO'} = $sno;
$read_extract_sents{$num} = $sentref;
}
}
sub sentref_array_to_extract
{ my $arrayref = shift;
my $hashref = {};
my $order = 0;
foreach my $sentref (sort by_did_and_sno @{$arrayref}) {
my $new_sentref = {};
$$new_sentref{'DID'} = $$sentref{'DID'};
$$new_sentref{'SNO'} = $$sentref{'SNO'};
$order++;
$$hashref{$order} = $new_sentref;
}
return $hashref;
}
write_extract | description | top | prev | next |
sub write_extract
{ my $extract = shift;
my %args = @_;
if (ref($extract) eq "ARRAY") {
$extract = sentref_array_to_extract($extract);
}
my $cluster_name = $args{'QID'};
my $lang = $args{'LANG'};
my $compression_percent = $args{'COMPRESSION'};
my $system = $args{'SYSTEM'};
my $run = $args{'RUN'};
my $output = $args{'OUTPUT'} ||\* STDOUT;
unless (ref $output) {
open TEMP, ">$output" or
die "Unable to open '$output' for printing extract.\n";
$output =\* TEMP;
}
my $writer = new XML::Writer(DATA_MODE => 1, OUTPUT => $output);
$writer->xmlDecl();
$writer->doctype("EXTRACT", "", "/clair/tools/mead/dtd/extract.dtd");
$writer->startTag("EXTRACT",
"QID" => $cluster_name,
"LANG" => $lang,
"COMPRESSION" => $compression_percent,
"SYSTEM" => $system,
"RUN" => $run);
foreach my $order (sort { $a <=> $b } keys %{ $extract }) {
my $sentref = $$extract{$order};
$writer->emptyTag("S",
"ORDER" => $order,
"DID" => $$sentref{'DID'},
"SNO" => $$sentref{'SNO'});
}
$writer->endTag();
$writer->end();
}
General documentation
No general documentation available.