DUC
Extract
No package variables defined. |
This class opens extracts, both from files and from strings, so that the user will be able to get extract information quickly and easily.
MEAD::Extract should do much the same stuff as this class.
|
_really_open_me | No description | Code |
get_DID_for_sentence | No description | Code |
get_DOCID_for_sentence | No description | Code |
get_SNO_for_sentence | No description | Code |
get_WCNT_for_sentence | No description | Code |
get_num_sentences | No description | Code |
get_text | No description | Code |
open_from_file | No description | Code |
parse_from_string | No description | Code |
Methods description
Methods code
_really_open_me | description | top | prev | next |
sub _really_open_me
{ my ($self, $extract_string) = @_;
$self->{extract_string} = $extract_string;
my $extract_tree = XML::TreeBuilder->new;
$extract_tree->parse($extract_string);
my $extract_node = $extract_tree->find_by_tag_name("multi-e");
my $node;
my $i = 0;
my @nodes = $extract_node->look_down("_tag", "s");
foreach my $node (@nodes) {
$i++;
## document ID's can be called either of these names...
my $DID = $node->attr("docref");
unless ($DID) {
$DID = $node->attr("docid");
}
my $SNO = $node->attr("num");
my $WCNT = $node->attr("wdcount");
## TODO: AJW 8/28
## get the wordcount somehow, possibly splitting the words...
##die unless ($DID and $SNO and $WCNT);
unless ($DID && $SNO) {
die "Couldn't find (at least one of) 'docid' or 'num' " .
"in the $i-th sentence node";
}
$self->{DID_list}[$i] = $DID;
$self->{SNO_list}[$i] = $SNO;
$self->{WCNT_list}[$i] = $WCNT;
}
##cleanup.
$extract_tree->delete;
}
sub get_DID_for_sentence
{ my ($self, $sentence) = @_;
return $self->get_DOCID_for_sentence($sentence);
}
sub get_DOCID_for_sentence
{ my ($self, $sentence) = @_;
return $self->{DID_list}[$sentence];
}
sub get_SNO_for_sentence
{ my ($self, $sentence) = @_;
return $self->{SNO_list}[$sentence];
}
sub get_WCNT_for_sentence
{ my ($self, $sentence) = @_;
return $self->{WCNT_list}[$sentence];
}
sub get_num_sentences
{ my $self = shift;
return scalar(@{$self->{DID_list}}) - 1;
}
sub get_text
{ my $self = shift;
unless ($self->{text}) {
$self->{text} = $self->{extract_string};
$self->{text} =~ s/\<.+?\>//g;
$self->{text} =~ s/\n+/ /g;
}
return $self->{text};
}
sub open_from_file
{ my ($class, $filename) = @_;
my $self = {};
bless $self, $class;
open EXTRACT, $filename;
my $extract_string;
while (<EXTRACT>) {
$extract_string .= $_;
}
close EXTRACT;
$self->_really_open_me($extract_string);
return $self;
}
parse_from_string | description | top | prev | next |
sub parse_from_string
{ my ($class, $extract_string) = @_;
my $self = {};
bless $self, $class;
$self->_really_open_me($extract_string);
return $self;
}
General documentation
No general documentation available.