Text-Context-EitherSide-1.4/0000700000076500000240000000000011177541211014611 5ustar tonystaffText-Context-EitherSide-1.4/Changes0000644000076500000240000000101411177540733016122 0ustar tonystaffRevision history for Perl extension Text::Context::EitherSide. 1.4 Mon May 4 13:22:08 EEST 2009 - Relicense as AL2.0 1.3 Tue Sep 27 10:29:25 UTC 2005 - Small doc fixes 1.2 Fri Oct 22 16:45:14 UTC 2004 - Now maintained by Tony Bowden - Small doc fixes 1.1 Tue Apr 1 19:47:10 BST 2003 - Support finding "Wadler" in "Walder's". - Case insensitive matching. 0.01 Thu Dec 5 12:47:48 2002 - original version; created by h2xs 1.22 with options -b 5.6.0 -AX -n Text::Context::EitherSide Text-Context-EitherSide-1.4/lib/0000700000076500000240000000000011177541211015357 5ustar tonystaffText-Context-EitherSide-1.4/lib/Text/0000700000076500000240000000000011177541211016303 5ustar tonystaffText-Context-EitherSide-1.4/lib/Text/Context/0000700000076500000240000000000011177541211017727 5ustar tonystaffText-Context-EitherSide-1.4/lib/Text/Context/EitherSide.pm0000644000076500000240000001327411177541074022342 0ustar tonystaffpackage Text::Context::EitherSide; use 5.006; use strict; use warnings; use Carp; our $VERSION = '1.4'; use base 'Exporter'; our @EXPORT_OK = qw(get_context); use constant DEFAULT_WORDS => 2; sub get_context { my ($n, $string, @words) = @_; Text::Context::EitherSide->new($string, context => $n)->as_string(@words); } sub new { my $class = shift; my $text = shift or carp "No text supplied for context search"; my %args = @_; return bless { n => exists $args{context} ? $args{context} : DEFAULT_WORDS, text => $text }, $class; } sub context { my $self = shift; $self->{n} = shift if @_; return $self->{n}; } sub as_sparse_list { my $self = shift; my @words = @_; my %keywords = map { lc $_ => 1 } map { split /\s+/, $_ } @words; # Decouple phrases # First, split the string into words my @split_s = split /\s+/, $self->{text}; # Now, locate keywords and "mark" the indices we want. my @marks = (undef) x @split_s; my $ok = 0; for (0 .. $#split_s) { my $word = lc $split_s[$_]; for my $subword (split /\W+/, $word) { if (exists $keywords{$subword}) { $ok++; # Mark it and its $n neighbours. $marks[$_] = $split_s[$_] for grep { $_ >= 0 and $_ <= $#split_s } $_ - $self->{n} .. $_ + $self->{n}; last; } } } return $ok ? @marks : (); } sub as_list { my $self = shift; my @sparse = $self->as_sparse_list(@_); return () unless @sparse; my @ret; for (0 .. $#sparse) { if (defined $sparse[$_]) { push @ret, $sparse[$_]; } else { push @ret, "..." unless @ret and $ret[-1] eq "..."; } } return @ret; } sub as_string { my $self = shift; return join " ", $self->as_list(@_); } 1; __END__ =head1 NAME Text::Context::EitherSide - Get n words either side of search keywords =head1 SYNOPSIS use Text::Context::EitherSide; my $text = "The quick brown fox jumped over the lazy dog"; my $context = Text::Context::EitherSide->new($text); $context->as_string("fox") # "... quick brown fox jumped over ..." $context->as_string("fox", "jumped") # "... quick brown fox jumped over the ..." my $context = Text::Context::EitherSide->new($text, context => 1); # 1 word on either side $context->as_string("fox", "jumped", "dog"); # "... brown fox jumped over ... lazy dog", Or, if you don't believe in all this OO rubbish: use Text::Context::EitherSide qw(get_context); get_context(1, $text, "fox", "jumped", "dog") # "... brown fox jumped over ... lazy dog" =head1 DESCRIPTION Suppose you have a large piece of text - typically, say, a web page or a mail message. And now suppose you've done some kind of full-text search on that text for a bunch of keywords, and you want to display the context in which you found the keywords inside the body of the text. A simple-minded way to do that would be just to get the two words either side of each keyword. But hey, don't be too simple minded, because you've got to make sure that the list doesn't overlap. If you have the quick brown fox jumped over the lazy dog and you extract two words either side of "fox", "jumped" and "dog", you really don't want to end up with quick brown fox jumped over brown fox jumped over the the lazy dog so you need a small amount of smarts. This module has a small amount of smarts. =head1 EXPORTABLE =head2 get_context This is primarily an object-oriented module. If you don't care about that, just import the C subroutine, and call it like so: get_context($num_of_words, $text, @words_to_find) and you'll get back a string with ellipses as in the synopsis. That's all that most people need to know. But if you want to do clever stuff... =head1 METHODS =head2 new my $c = Text::Context::EitherSite->new($text [, context=> $n]); Create a new object storing some text to be searched, plus optionally some information about how many words on either side you want. (If you don't like the default of 2.) =head2 context $c->context(5); Allows you to get and set the number of the words on either side. =head2 as_sparse_list $c->as_sparse_list(@keywords) Returns the keywords, plus I words on either side, as a sparse list; the original text is split into an array of words, and non-contextual elements are replaced with Cs. (That's not actually how it works, but conceptually, it's the same.) =head2 as_list $c->as_list(@keywords) The same as C, but single or multiple Cs are collapsed into a single ellipsis: (undef, "foo", undef, undef, undef, "bar") becomes ("...", "foo", "...", "bar") =head2 as_string $c->as_string(@keywords) Takes the C output above and joins them all together into a string. This is what most people want from C. =head2 EXPORT C is available as a shortcut for Text::Context::EitherSide->new($text, context => $n)->as_string(@words); but needs to be explicitly imported. Nothing is exported by default. =head1 SEE ALSO L is an even smarter way of extracting a contextual string. =head1 AUTHOR Current maintainer: Tony Bowden Original author: Simon Cozens =head1 BUGS and QUERIES Please direct all correspondence regarding this module to: bug-Text-Context-EitherSide@rt.cpan.org =head1 COPYRIGHT AND LICENSE Copyright 2002-2005 by Kasei Limited, http://www.kasei.com/ You may use and redistribute this module under the terms of the Artistic License 2.0. http://www.perlfoundation.org/artistic_license_2_0 =cut Text-Context-EitherSide-1.4/Makefile.PL0000644000076500000240000000043710136234561016602 0ustar tonystaffuse 5.006; use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'Text::Context::EitherSide', 'VERSION_FROM' => 'lib/Text/Context/EitherSide.pm', 'ABSTRACT_FROM' => 'lib/Text/Context/EitherSide.pm', 'AUTHOR' => 'Tony Bowden ', 'PREREQ_PM' => {}, ); Text-Context-EitherSide-1.4/MANIFEST0000644000076500000240000000024110316217746015760 0ustar tonystaffChanges lib/Text/Context/EitherSide.pm Makefile.PL MANIFEST MANIFEST.SKIP META.yml Module meta-data (added by MakeMaker) README t/1.t t/pod-coverage.t t/pod.t Text-Context-EitherSide-1.4/MANIFEST.SKIP0000444000076500000240000000054510316217473016527 0ustar tonystaff# Avoid version control files. \bRCS\b \bCVS\b ,v$ ,B$ ,D$ \B\.svn\b aegis.log$ \bconfig$ \bbuild$ # Avoid Makemaker generated and utility files. \bMakefile$ \bblib \bMakeMaker-\d \bpm_to_blib$ \bblibdirs$ # Avoid Module::Build generated and utility files. \bBuild$ \b_build # Avoid temp and backup files. ~$ \.gz$ \.old$ \.bak$ \.swp$ \.tdy$ \#$ \b\.# Text-Context-EitherSide-1.4/META.yml0000600000076500000240000000064511177541211016071 0ustar tonystaff--- #YAML:1.0 name: Text-Context-EitherSide version: 1.4 abstract: Get n words either side of search keywords license: ~ author: - Tony Bowden generated_by: ExtUtils::MakeMaker version 6.44 distribution_type: module requires: meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.3.html version: 1.3 Text-Context-EitherSide-1.4/README0000644000076500000240000000762311177541077015525 0ustar tonystaffNAME Text::Context::EitherSide - Get n words either side of search keywords SYNOPSIS use Text::Context::EitherSide; my $text = "The quick brown fox jumped over the lazy dog"; my $context = Text::Context::EitherSide->new($text); $context->as_string("fox") # "... quick brown fox jumped over ..." $context->as_string("fox", "jumped") # "... quick brown fox jumped over the ..." my $context = Text::Context::EitherSide->new($text, context => 1); # 1 word on either side $context->as_string("fox", "jumped", "dog"); # "... brown fox jumped over ... lazy dog", Or, if you don't believe in all this OO rubbish: use Text::Context::EitherSide qw(get_context); get_context(1, $text, "fox", "jumped", "dog") # "... brown fox jumped over ... lazy dog" DESCRIPTION Suppose you have a large piece of text - typically, say, a web page or a mail message. And now suppose you've done some kind of full-text search on that text for a bunch of keywords, and you want to display the context in which you found the keywords inside the body of the text. A simple-minded way to do that would be just to get the two words either side of each keyword. But hey, don't be too simple minded, because you've got to make sure that the list doesn't overlap. If you have the quick brown fox jumped over the lazy dog and you extract two words either side of "fox", "jumped" and "dog", you really don't want to end up with quick brown fox jumped over brown fox jumped over the the lazy dog so you need a small amount of smarts. This module has a small amount of smarts. EXPORTABLE get_context This is primarily an object-oriented module. If you don't care about that, just import the "get_context" subroutine, and call it like so: get_context($num_of_words, $text, @words_to_find) and you'll get back a string with ellipses as in the synopsis. That's all that most people need to know. But if you want to do clever stuff... METHODS new my $c = Text::Context::EitherSite->new($text [, context=> $n]); Create a new object storing some text to be searched, plus optionally some information about how many words on either side you want. (If you don't like the default of 2.) context $c->context(5); Allows you to get and set the number of the words on either side. as_sparse_list $c->as_sparse_list(@keywords) Returns the keywords, plus *n* words on either side, as a sparse list; the original text is split into an array of words, and non-contextual elements are replaced with "undef"s. (That's not actually how it works, but conceptually, it's the same.) as_list $c->as_list(@keywords) The same as "as_sparse_list", but single or multiple "undef"s are collapsed into a single ellipsis: (undef, "foo", undef, undef, undef, "bar") becomes ("...", "foo", "...", "bar") as_string $c->as_string(@keywords) Takes the "as_list" output above and joins them all together into a string. This is what most people want from "Text::Context::EitherSide". EXPORT "get_context" is available as a shortcut for Text::Context::EitherSide->new($text, context => $n)->as_string(@words); but needs to be explicitly imported. Nothing is exported by default. SEE ALSO Text::Context is an even smarter way of extracting a contextual string. AUTHOR Current maintainer: Tony Bowden Original author: Simon Cozens BUGS and QUERIES Please direct all correspondence regarding this module to: bug-Text-Context-EitherSide@rt.cpan.org COPYRIGHT AND LICENSE Copyright 2002-2005 by Kasei Limited, http://www.kasei.com/ You may use and redistribute this module under the terms of the Artistic License 2.0. http://www.perlfoundation.org/artistic_license_2_0 Text-Context-EitherSide-1.4/t/0000700000076500000240000000000011177541211015054 5ustar tonystaffText-Context-EitherSide-1.4/t/1.t0000644000076500000240000000375607573656771015455 0ustar tonystaff# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 1.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 10; use_ok("Text::Context::EitherSide"); Text::Context::EitherSide->import("get_context"); ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. my $text = "The quick brown fox jumped over the lazy dog"; is( get_context(2, $text, "fox"), "... quick brown fox jumped over ...", "one word, context 2" ); is( get_context(2, $text, "fox", "jumped"), "... quick brown fox jumped over the ...", "adjacent words, context 2" ); is( get_context(2, $text, "fox", "jumped", "dog"), "... quick brown fox jumped over the lazy dog", "adjacent and distinct words, (including one at the end) context 2" ); is( get_context(1, $text, "fox", "jumped", "dog"), "... brown fox jumped over ... lazy dog", "adjacent and distinct words, (including one at the end) context 1" ); is( get_context(1, $text, "fox jumped dog"), "... brown fox jumped over ... lazy dog", "arguments get_context split correctly" ); is( get_context(1, "Test > X foo && bar | z", "X", "bar"), "... > X foo && bar | ...", "non-words act like words" ); is( get_context(2, "wobble wobble wobble wobble wobble wobble wobble", "wobble"), "wobble wobble wobble wobble wobble wobble wobble", "repeated words are caught multiple times" ); is(get_context(0, "bother blast damned", "the", "last"), '', "only whole words match, not partial words"); is( get_context(0, $text, "fox", "dog"), "... fox ... dog", "Context 0 (How very silly)" ); Text-Context-EitherSide-1.4/t/pod-coverage.t0000644000076500000240000000024110316217376017631 0ustar tonystaffuse Test::More; eval "use Test::Pod::Coverage 1.00"; plan skip_all => "Test::Pod::Coverage 1.00 required for testing POD coverage" if $@; all_pod_coverage_ok(); Text-Context-EitherSide-1.4/t/pod.t0000644000076500000240000000020110316217376016034 0ustar tonystaffuse Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok();