Text-Capitalize-1.5000755001750001750 013543270756 13461 5ustar00doomdoom000000000000Text-Capitalize-1.5/Build.PL000444001750001750 162713543270756 15120 0ustar00doomdoom000000000000use strict; use warnings; use Module::Build; my $builder = Module::Build->new( module_name => 'Text::Capitalize', license => 'perl', dist_author => q{Joseph Brenner }, dist_version_from => 'lib/Text/Capitalize.pm', build_requires => { 'Test::More' => 0, 'FindBin' => 1.04, 'lib' => 0, 'Env' => 1.00, 'Data::Dumper' => 0, }, build => { 'strict' => 0, 'warnings' => 0, 'utf8' => 0, 'Carp' => 0, 'Exporter' => 0, 'vars' => 0, }, add_to_cleanup => [ 'Text-Capitalize-*' ], create_makefile_pl => 'traditional', ); $builder->create_build_script(); Text-Capitalize-1.5/Changes000444001750001750 403413543270756 15112 0ustar00doomdoom000000000000Revision history for Text-Capitalize 0.1 original version; created by Stanislaw Y. Puspep. this included only the capitalize function 0.2 Fri Aug 1 10:19:49 2003 - created a new framework using h2xs 1.22 with options -AX -n Text::Capitalize capitalize_title function added by Joseph Brenner 0.3 Changed stucture of driving loop for capitalize_title, fixed a few bugs and (hopefully) improved readability. Added "special effects" functions: scramble_case, random_case, zippify_case 0.4 Thu Sep 18 17:50:13 2003, Thu Feb 17, 2005 1:13 PM Cleaned up documentation slightly, belatedly added the above remark "0.3", which describes the version 0.3 release. Fixed bad bug: all routines now use localized $_, to keep from clobbering $_ in calling code. Added tests for random_case and scramble_case. 0.5 Tue Jan 31, 2006 12:24 AM Bug fix release for the test suite: if the current locale can't deal with the international characters use in some of the test cases, those particular test cases will be skipped. 0.6-0.8 Tuesday March 17, 2009 10:32 AM All bug fix releases for the tests, redesigning them to make them insensitive to the set-up of locales. 0.9 Wed Jan 6 19:10:10 2010 Revised the documentation. Switched to Module::Build. Refactored some routines. 1.0 Fixed "use lib" lines in *.t files (new layout from module_starter since 0.9). Added "use utf8" to all files, and deleted "use locale". Made sure all files were saved with utf8 encoding (many were latin-1 before). Now requiring at least perl 5.6 (previously 5.4). 1.1 Improved speed of capitalize_title routine. 1.2 Improved portability to perl 5.6. 1.3 Minor cleanup of *.t files 1.4 Changed uc calls to ucfirst (hypothetically better for title-casing with unicode strings) 1.5 Removed PerlIO::locale dependency from tests, got rid of the Andy Lester tests of pod and such. Text-Capitalize-1.5/MANIFEST000444001750001750 71413543270756 14731 0ustar00doomdoom000000000000Build.PL Changes MANIFEST README lib/Text/Capitalize.pm t/001-load.t t/002-captitle-default.t t/003-captitle-preserve_whitespace.t t/004-captitle-preserve_allcaps.t t/005-captitle-preserve_anycaps.t t/006-captitle-preserve_allcaps_and_whitespace.t t/007-captitle-preserve_anycaps_and_whitespace.t t/008-random_case.t t/009-scramble_case.t t/lib/Test/Locale/Utils.pm t/lib/Test/Locale/t/000-all_true-Test-Locale-Utils-private.t Makefile.PL META.yml META.json Text-Capitalize-1.5/META.json000444001750001750 210613543270756 15236 0ustar00doomdoom000000000000{ "abstract" : "capitalize strings (\"to WORK AS titles\" becomes \"To Work as Titles\")", "author" : [ "Joseph Brenner " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.4229", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Text-Capitalize", "prereqs" : { "build" : { "requires" : { "Data::Dumper" : "0", "Env" : "1", "FindBin" : "1.04", "Test::More" : "0", "lib" : "0" } }, "configure" : { "requires" : { "Module::Build" : "0.42" } } }, "provides" : { "Text::Capitalize" : { "file" : "lib/Text/Capitalize.pm", "version" : "1.5" } }, "release_status" : "stable", "resources" : { "license" : [ "http://dev.perl.org/licenses/" ] }, "version" : "1.5", "x_serialization_backend" : "JSON::PP version 2.97001" } Text-Capitalize-1.5/META.yml000444001750001750 131213543270756 15064 0ustar00doomdoom000000000000--- abstract: 'capitalize strings ("to WORK AS titles" becomes "To Work as Titles")' author: - 'Joseph Brenner ' build_requires: Data::Dumper: '0' Env: '1' FindBin: '1.04' Test::More: '0' lib: '0' configure_requires: Module::Build: '0.42' dynamic_config: 1 generated_by: 'Module::Build version 0.4229, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Text-Capitalize provides: Text::Capitalize: file: lib/Text/Capitalize.pm version: '1.5' resources: license: http://dev.perl.org/licenses/ version: '1.5' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' Text-Capitalize-1.5/Makefile.PL000444001750001750 74513543270756 15556 0ustar00doomdoom000000000000# Note: this file was auto-generated by Module::Build::Compat version 0.4229 use ExtUtils::MakeMaker; WriteMakefile ( 'NAME' => 'Text::Capitalize', 'VERSION_FROM' => 'lib/Text/Capitalize.pm', 'PREREQ_PM' => { 'Data::Dumper' => 0, 'Env' => 1, 'FindBin' => '1.04', 'Test::More' => 0, 'lib' => 0 }, 'INSTALLDIRS' => 'site', 'EXE_FILES' => [], 'PL_FILES' => {} ) ; Text-Capitalize-1.5/README000444001750001750 347013543270756 14502 0ustar00doomdoom000000000000Text-Capitalize ============================ Text::Capitalize provides functions to transform lines of text into presentable titles (i.e. it does "title casing"). The most important function here is "capitalize_title". It is exported by default. There is also an optional "scramble_case" function which is designed to produce a "wEiRD eFFeCt". Version 1.2 should work on perl 5.6 (there were problems in the tests). Version 1.1 improves the speed of the main capitalize_title routine. Version 1.0 was just fixes test cases with international characters (switching from latin-1 to utf-8, etc). Version 0.9 primarily provided some documentation revisions, and the switch to using Module::Build. INSTALLATION To install this module, run the following commands: perl Build.PL ./Build ./Build test ./Build install DEPENDENCIES This module requires these other modules and libraries: The "use locale" pragma, standard with perl as of version 5.004. Test::More is required to run "make test". SUPPORT AND DOCUMENTATION After installing, you can find documentation for this module with the perldoc command. perldoc Text::Capitalize You can also look for information at: RT, CPAN's request tracker http://rt.cpan.org/NoAuth/Bugs.html?Dist=Text-Capitalize AnnoCPAN, Annotated CPAN documentation http://annocpan.org/dist/Text-Capitalize CPAN Ratings http://cpanratings.perl.org/d/Text-Capitalize Search CPAN http://search.cpan.org/dist/Text-Capitalize/ COPYRIGHT AND LICENCE Copyright (C) 2003, 2010 Joseph Brenner This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published by the Free Software Foundation; or the Artistic License. See http://dev.perl.org/licenses/ for more information. Text-Capitalize-1.5/lib000755001750001750 013543270756 14227 5ustar00doomdoom000000000000Text-Capitalize-1.5/lib/Text000755001750001750 013543270756 15153 5ustar00doomdoom000000000000Text-Capitalize-1.5/lib/Text/Capitalize.pm000444001750001750 5657013543270756 17770 0ustar00doomdoom000000000000package Text::Capitalize; =head1 NAME Text::Capitalize - capitalize strings ("to WORK AS titles" becomes "To Work as Titles") =head1 SYNOPSIS use Text::Capitalize; print capitalize( "...and justice for all" ), "\n"; ...And Justice For All print capitalize_title( "...and justice for all" ), "\n"; ...And Justice for All print capitalize_title( "agent of SFPUG", PRESERVE_ALLCAPS=>1 ), "\n"; Agent of SFPUG print capitalize_title( "the ring: symbol or cliche?", PRESERVE_WHITESPACE=>1 ), "\n"; The Ring: Symbol or Cliche? (Note, double-space after colon is still there.) # To work on international characters, may need to set locale use Env qw( LANG ); $LANG = "en_US"; print capitalize_title( "über maus" ), "\n"; Über Maus use Text::Capitalize qw( scramble_case ); print scramble_case( 'It depends on what you mean by "mean"' ); It dEpenDS On wHAT YOu mEan by "meAn". =head1 ABSTRACT Text::Capitalize is for capitalizing strings in a manner suitable for use in titles. =head1 DESCRIPTION Text::Capitalize provides some routines for B formatting of strings. The simple B function just makes the inital character of each word uppercase, and forces the rest to lowercase. The B function applies English title case rules (discussed below) where only the "important" words are supposed to be capitalized. There are also some customization features provided to allow the user to choose variant rules. Comparing B and B: Input: "lost watches of splitsville" capitalize: "Lost Watches Of Splitsville" capitalize_title: "Lost Watches of Splitsville" Some examples of formatting with B: Input: "KiLLiNG TiMe" capitalize_title: "Killing Time" Input: "we have come to wound the autumnal city" capitalize_title: "We Have Come to Wound the Autumnal City" Input: "ask for whom they ask for" captialize_title: "Ask for Whom They Ask For" Text::Capitalize also provides some functions for special effects such as B, which typically would be used for this sort of transformation: Input: "get whacky" scramble_case: "gET wHaCkY" (or something similar) =head1 EXPORTS =head2 default exports =over =cut use 5.006; use strict; use warnings; use utf8; # use locale; use Carp; use Exporter; use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); use vars qw($DEBUG); $DEBUG = 0; @ISA = qw(Exporter); @EXPORT = qw(capitalize capitalize_title); @EXPORT_OK = qw(@exceptions %defaults_capitalize_title scramble_case random_case zippify_case capitalize_title_original ); $VERSION = '1.5'; # Define the pattern to match "exceptions": the minor words # that don't usually get capitalized in titles (used by capitalize_title) use vars qw(@exceptions); @exceptions = qw( a an the and or nor for but so yet to of by at for but in with has de von ); # Define the default arguments for the capitalize_title function use vars qw(%defaults_capitalize_title); %defaults_capitalize_title = ( PRESERVE_WHITESPACE => 0, PRESERVE_ALLCAPS => 0, PRESERVE_ANYCAPS => 0, NOT_CAPITALIZED => \@exceptions, ); # Defining patterns to match "words" and "sentences" (used by capitalize_title) use vars qw($word_rule $sentence_rule); use vars qw($anything $ellipsis $dot $qmark $emdash $terminator $ws); $word_rule = qr{ ([^\w\s]*) # $1 - leading punctuation # (e.g. ellipsis, leading apostrophe) ([\w']*) # $2 - the word itself (includes non-leading apostrophes) ([^\w\s]*) # $3 - trailing punctuation # (e.g. comma, ellipsis, period) (\s*) # $4 - trailing whitespace # (usually " ", though at EOL prob "") }x ; # Pieces for the $sentence_rule $anything = qr{.*?}; $ellipsis = qr{\Q...}; $dot = qr{\Q.}; $qmark = qr{\Q?}; $emdash = qr{\Q--}; $terminator = qr{$ellipsis|$dot|$qmark|!|:|$emdash|$}; $ws = qr{\s*}; $sentence_rule = qr{ ( $anything # anything up to... $terminator # any sentence terminator (*or* the EOS) $ws # trailing whitespace, if any ) # all captured to $1 }ox; =item capitalize Makes the inital character of each word uppercase, and forces the rest to lowercase. The original routine by Stanislaw Y. Pusep. =cut sub capitalize { local $_ = shift; s/\b(.*?)\b/$1 eq uc $1 ? $1 : "\u\L$1"/ge; return $_; } =item capitalize_title Applies English title case rules (See L) where only the "important" words are supposed to be capitalized. The one required argument is the string to be capitalized. Some customization options may be passed in as pairs of names and values following the required argument. The following customizations are allowed: Boolean: PRESERVE_WHITESPACE PRESERVE_ALLCAPS PRESERVE_ANYCAPS Array reference: NOT_CAPITALIZED See L. =cut sub capitalize_title { my $string = shift; my %args = (%defaults_capitalize_title, @_ # imports the argument pair list, if any ); # Checking for spelling errors in options foreach (keys %args) { unless (exists $defaults_capitalize_title{$_}) { carp "Bad option $_\n"; } } my $keep_ws = $args{ PRESERVE_WHITESPACE }; my $keep_acronyms = $args{ PRESERVE_ALLCAPS }; my $keep_mixups = $args{ PRESERVE_ANYCAPS }; my $exceptions_or = join '|', @{ $args{ NOT_CAPITALIZED } }; my $exception_rule = qr{^(?:$exceptions_or)$}i; my $new_string = ""; ### Processing each sentence (titles can have multiple sentences) while ( $string =~ /$sentence_rule/g ) { my $sentence = $1; my $new_sentence = ""; my @words = (); # The array @words will contain records about each word, including its # surroundings: trailing whitespace and leading or trailing punctuation # (for cases such as "...and", "'em", "and...", "F.B.I.") # Each row is an aref of: $punct_leading, $word, $punct_trailing, $spc my $i = 0; while ($sentence =~ /$word_rule/g) { # If we've matched something, load it (pattern yields an empty match at eos) if ( ($2 ne '') or $1 or $3 or ($4 ne '') ) { $words[ $i ] = [ $1, $2, $3, $4 ]; $i++; } } ### Processing each word my ($punct_leading, $word, $punct_trailing, $spc); my $first = 0; my $last = $#words; for ( $i = $first; $i <= $last; $i++ ) { { # (easier to know when you're doing the first and last using explicit counter) ($punct_leading, $word, $punct_trailing, $spc) = ( @{ $words[$i] } ); unless ($keep_ws) { # collapse whitespace $spc = " " if (length($spc) > 0); } # Keep words with any capitals (e.g. "iMac") if they're being passed through. next if ( ($keep_mixups) && ( $word =~ m{[[:upper:]]} ) ); # Keep all uppercase words if they're being passed through. next if ( ($keep_acronyms) && ( $word =~ m{^[[:upper:]]+$}) ); # Fugliness to get some French names to work, e.g. "d'Alembert", "l'Hospital" if ( $word =~ m{^[dl]'}) { $word =~ s{ ^(d') (\w) }{ lc($1) . ucfirst($2) }iex; $word =~ s{ ^(l') (\w) }{ lc($1) . ucfirst($2) }iex; # But upcase first char if first or last word if ( ($i == $first) or ($i == $last) ) { $word = ucfirst( $word ); } next; } # The first word and the last are always capitalized if ( ($i == $first) or ($i == $last) ) { $word = ucfirst( lc( $word ) ); next; } # upcase all words, except for the exceptions if ( $word =~ m{$exception_rule} ) { $word = lc( $word ); } else { $word = ucfirst( lc( $word ) ); } } continue { # Append word to the new sentence $new_sentence .= $punct_leading . $word . $punct_trailing . $spc; } } # end of per word for loop $new_string .= $new_sentence; } # end of per sentence loop. # Delete leading/trailing spaces, unless preserving whitespace, # (Doing as final step to avoid dropping spaces *between* sentences.) unless ($keep_ws) { $new_string =~ s|^\s+||; $new_string =~ s|\s+$||; } return $new_string; } # end sub capitalize_title =back =head2 optional exports =over =item @exceptions The list of minor words that don't usually get capitalized in titles (used by L). Defaults to: a an the and or nor for but so yet to of by at for but in with has de von =item %defaults_capitalize_title Defines the default arguments for the capitalize_title function Initially, this is set-up to shut off the features PRESERVE_WHITESPACE, PRESERVE_ALLCAPS and PRESERVE_ANYCAPS; it also has L<@exceptions> as the NOT_CAPITALIZED list. =item scramble_case This routine provides a special effect: sCraMBliNg tHe CaSe The algorithm here uses a modified probability distribution to get a weirder looking effect than simple randomization such as with L. For a discussion of the algorithm, see L. =cut # Instead of initializing $uppers, $downers to zero, using fudged # initial counts to # (1) provide an initial bias against leading with uppercase, # (2) eliminate need to watch for division by zero on $tweak below. # Rather than "int(rand(2))" which generates a 50/50 distribution of 0s and 1s, # we're using "int(rand(1+$tweak))" where $tweak will # provide a restoring force back to the average # So here we want $tweak: # to go to 1 when you approach $uppers = $downers # to be larger than 1 if $downers > $uppers # to be less than 1 if $uppers > $downers # A simple formula that does this: # $uppity = int( rand( 1 + $downers/$uppers) ); # The alternative (proposed by Randal Schwartz) is no real speed improvement: # $uppity = rand( $uppers + $downers ) > $uppers; # (though there are no worries about divide by zero there). # Note that this benchmarks faster: # @chars = split //, $string; # Than: # @chars = split /(?<=[[:alpha:]])/, $string; sub scramble_case { my $string = shift; my (@chars, $uppity, $newstring, $total, $uppers, $downers, $tweak); @chars = split //, $string; $uppers = 2; $downers = 1; foreach my $c (@chars) { $uppity = int( rand( 1 + $downers/$uppers) ); if ($uppity) { $c = ucfirst($c); $uppers++; } else { $c = lc($c); $downers++; } } $newstring = join '', @chars; return $newstring; } =item random_case Randomizes the case of each character with a 50-50 chance of each one becoming upper or lower case. =cut sub random_case { local $_; my $string = shift; my (@chars, $uppity, $newstring); @chars = split //, $string; foreach (@chars) { $uppity = int ( rand(2) ); # simple, 50-50 random pick if ($uppity) { $_ = uc; } else { $_ = lc; } } $newstring = join '', @chars; return $newstring; } =item zippify_case Function to provide a special effect: "RANDOMLY upcasing WHOLE WORDS at a TIME". This uses a similar algorithm to L, though it also ignores words on the L<@exceptions> list, just as L does. =cut sub zippify_case { my $string = shift; my (@words, $uppity, $newstring, $total, $uppers, $downers, $tweak); @words = split /\b/, $string; $uppers = 1; $downers = 5; WORD: foreach my $word (@words) { foreach (@exceptions) { next WORD if m/\Q$word\E/i; } # a modified "random" distribution with fewer "streaks" than normal. $uppity = int( rand( 1 + $downers/$uppers ) ); if ($uppity) { $word = ucfirst($word); $uppers++; } else { $word = lc($word); $downers++; } } $newstring = join '', @words; return $newstring; } 1; =back =head1 BACKGROUND The capitalize_title function tries to do the right thing by default: adjust an arbitrary chunk of text so that it can be used as a title. But as with many aspects of the human languages, it is extremely difficult to come up with a set of programmatic rules that will cover all cases. =head2 Words that don't get capitalized This web page: http://www.continentallocating.com/World.Literature/General2/LiteraryTitles2.htm presents some admirably clear rules for capitalizing titles: ALL words in EVERY title are capitalized except (1) a, an, and the, (2) two and three letter conjunctions (and, or, nor, for, but, so, yet), (3) prepositions. Exceptions: The first and last words are always capitalized even if they are among the above three groups. But consider the case: "It Waits Underneath the Sea" Should the word "underneath" be downcased because it's a preposition? Most English speakers would be surprised to see it that way. Consequently, the default list of exceptions to capitalization in this module only includes the shortest of the common prepositions (to of by at for but in). The default entries on the exception list are: a an the and or nor for but so yet to of by at for but in with has de von The observant may note that the last row is not composed of English words. The honorary "de" has been included in honor of "Honoré de Balzac". And "von" was added for the sake of equal time. =head2 Customizing the Exceptions to Capitalization If you have different ideas about the "rules" of English (or perhaps if you're trying to use this code with another language with different rules) you might like to substitute a new exception list of your own: capitalize_title( "Dude, we, like, went to Old Slavy, and uh, they didn't have it", NOT_CAPITALIZED => [ qw( uh duh huh wha like man you know ) ] ); This should return: Dude, We, like, Went To Old Slavy, And uh, They Didn't Have It Less radically, you might like to simply add a word to the list, for example "from": use Text::Capitalize 0.2 qw( capitalize_title @exceptions ); push @exceptions, "from"; print capitalize_title( "fungi from yuggoth", NOT_CAPITALIZED => \@exceptions); This should output: Fungi from Yuggoth =head2 All Uppercase Words In order to work with a wide range of input strings, by default capitalize_title presumes that upper-case input needs to be adjusted (e.g. "DOOM APPROACHES!" would become "Doom Approaches!"). But, this doesn't allow for the possibilities such as an acronym in a title (e.g. "RAM Prices Plummet" ideally should not become "Ram Prices Plummet"). If the PRESERVE_ALLCAPS option is set, then it will be presumed that an all-uppercase word is that way for a reason, and will be left alone: print capitalize_title( "ram more RAM down your throat", PRESERVE_ALLCAPS => 1 ); This should output: Ram More RAM Down Your Throat =head2 Preserving Any Usage of Uppercase for Mixed-case Words There are some other odd cases that are difficult to handle well, notably mixed-case words such as "iMac", "CHiPs", and so on. For these purposes, a PRESERVE_ANYCAPS option has been provided which presumes that any usage of uppercase is there for a reason, in which case the entire word should be passed through untouched. With PRESERVE_ANYCAPS on, only the case of all lowercase words will ever be adjusted: print capitalize_title( "TLAs i have known and loved", PRESERVE_ANYCAPS => 1 ); This should output: TLAs I Have Known and Loved print capitalize_title( "the next iMac: just another NeXt?", PRESERVE_ANYCAPS => 1); This should output: The Next iMac: Just Another NeXt? =head2 Handling Whitespace By default, the capitalize_title function presumes that you're trying to clean up potential title strings. As an extra feature it collapses multiple spaces and tabs into single spaces. If this feature doesn't seem desirable and you want it to literally restrict itself to adjusting capitalization, you can force that behavior with the PRESERVE_WHITESPACE option: print capitalize_title( "it came from texas: the new new world order?", PRESERVE_WHITESPACE => 1); This should output: It Came From Texas: The New New World Order? (Note: the double-space after the colon is still there.) =head2 Comparison to Text::Autoformat As you might expect, there's more than one way to do this, and these two pieces of code perform very similar functions: use Text::Capitalize 0.2; print capitalize_title( $t ), "\n"; use Text::Autoformat; print autoformat { case => "highlight", right => length( $t ) }, $t; Note: with autoformat, supplying the length of the string as the "right margin" is much faster than plugging in an arbitrarily large number. There doesn't seem to be any other way of turning off line-breaking (e.g. by using the "fill" parameter) though possibly there will be in the future. As of this writing, "capitalize_title" has some advantages: =over =item 1. It works on characters outside the English 7-bit ASCII range, for example with my locale setting (en_US) the ISO-8859-1 International characters are handled correctly, so that "über maus" becomes "Über Maus". =item 2. Minor words following leading punctuation become upper case: "...And Justice for All" =item 3. It works with multiple sentence input (e.g. "And sooner. And later." should probably not be "And sooner. and later.") =item 4. The list of minor words is more extensive (i.e. includes: so, yet, nor), and is also customizable. =item 5. There's a way of preserving acronyms via the PRESERVE_ALLCAPS option and similarly, mixed-case words ("iMac", "NeXt", etc") with the PRESERVE_ANYCAPS option. =item 6. capitalize_title is roughly ten times faster. =back Another difference is that Text::Autoformat's "highlight" always preserves whitespace something like capitalize_title does with the PRESERVE_WHITESPACE option set. However, it should be pointed out that Text::Autoformat is under active maintenance by Damian Conway. It also does far more than this module, and you may want to use it for other reasons. =head2 Still more ways to do it Late breaking news: The second edition of the Perl Cookbook has just come out. It now includes: "Properly Capitalizing a Title or Headline" as recipe 1.14. You should familiarize yourself with this if you want to become a true master of all title capitalization routines. (And I see that recipe 1.13 includes a "randcap" program as an example, which as it happens does something like the random_case function described below...) =head1 SPECIAL EFFECTS Some functions have been provided to make strings look weird by scrambling their capitalization ("lIKe tHiS"): random_case and scramble_case. The function "random_case" does a straight-forward randomization of capitalization so that each letter has a 50-50 chance of being upper or lower case. The function "scramble_case" performs a very similar function, but does a slightly better job of producing something "weird-looking". The difficulty is that there are differences between human perception of randomness and actual randomness. Consider the fact that of the sixteen ways that the four letter word "word" can be capitalized, three of them are rather boring: "word", "Word" and "WORD". To make it less likely that scramble_case will produce dull output when you want "weird" output, a modified probability distribution has been used that records the history of previous outcomes, and tweaks the likelihood of the next decision in the opposite direction, back toward the expected average. In effect, this simulates a world in which the Gambler's Fallacy is correct ("Hm... red has come up a lot, I bet that black is going to come up now."). "Streaks" are much less likely with scramble_case than with random_case. Additionally, with scramble_case the probability that the first character of the input string will become upper-case has been tweaked to less than 50%. (Future versions may apply this tweak on a per-word basis rather than just on a per-string basis). There is also a function that scrambles capitalization on a word-by-word basis called "zippify_case", which should produce output like: "In my PREVIOUS life i was a LATEX-novelty REPAIRMAN!" =head1 EXPORT By default, this version of the module provides the two functions capitalize and capitalize_title. Future versions will have no further additions to the default export list. Optionally, the following functions may also be exported: =over =item scramble_case A function to scramble capitalization in a wEiRD loOOkInG wAy. Supposed to look a little stranger than the simpler random_case output =item random_case Function to randomize capitalization of each letter in the string. Compare to "scramble_case" =item zippify_case A function like "scramble_case" that acts on a word-by-word basis (Somewhat LIKE this, YOU know?). =back It is also possible to export the following variables: =over =item @exceptions The list of minor words that capitalize_title uses by default to determine the exceptions to capitalization. =item %defaults-capitalize_title The hash of allowed arguments (with defaults) that the capitalize_title function uses. =back =head1 BUGS 1. In capitalize_title, quoted sentence terminators are treated as actual sentence breaks, e.g. in this case: 'say "yes but!" and "know what?"' The program sees the ! and effectively treats this as two separate sentences: the word "but" becomes "But" (under the rule that last words must always be uppercase, even if they're on the exception list) and the word "and" becomes "And" (under the first word rule). 2. There's no good way to automatically handle names like "McCoy". Consider the difficulty of disambiguating "Macadam Roads" from "MacAdam Rode". If you need to solve problems like this, consider using the case_surname function of Lingua::En::NameParse. 3. In general, Text::Capitalize is a very parochial English oriented module that looks like it belongs in the "Lingua::En::*" tree. 4. There's currently no way of doing a PRESERVE_ANYCAPS that *also* adjusts capitalization of words on the exception list, so that "iMac Or iPod" would become "iMac or iPod". =head1 SEE ALSO L "The Perl Cookbook", second edition, recipes 1.13 and 1.14 L About "scramble_case": L =head1 VERSION Version 0.9 =head1 AUTHORS Joseph M. Brenner E-Mail: doom@kzsu.stanford.edu Homepage: http://obsidianrook.com/map Stanislaw Y. Pusep (who wrote "capitalize") E-Mail: stanis@linuxmail.org ICQ UIN: 11979567 Homepage: http://sysdlabs.hypermart.net/ And many thanks (for feature suggestions and code examples) to: Belden Lyman, Yary Hcluhan, Randal Schwartz =head1 COPYRIGHT AND LICENSE Copyright 2003 by Joseph Brenner. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Text-Capitalize-1.5/t000755001750001750 013543270756 13724 5ustar00doomdoom000000000000Text-Capitalize-1.5/t/001-load.t000444001750001750 53513543270756 15446 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 001-load.t' ######################### use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); use Test::More tests => 2; use_ok('Text::Capitalize'); use_ok("Test::Locale::Utils"); ######################### Text-Capitalize-1.5/t/002-captitle-default.t000444001750001750 2057313543270756 20023 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 002-captitle-default.t' ######################### use warnings; use strict; $|=1; use utf8; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); use Text::Capitalize 0.4 qw( capitalize_title ); use Test::Locale::Utils qw( :all ); use Test::More; my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; plan tests => $total; my $builder = Test::More->builder; binmode $builder->output, ":encoding(utf-8)"; binmode $builder->failure_output, ":encoding(utf-8)"; binmode $builder->todo_output, ":encoding(utf-8)"; my $i18n_system = is_locale_international(); { foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = capitalize_title( $case ); is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title( $case ); is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $test_name = "\$\_ unaffected by capitalize_title"; my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; capitalize_title($anything); is ($_, $keeper, "$test_name"); } ####### ### end main, into the subs # Hashref of test cases (keys) and expected results (values) for the # vanillia "capitalize_title" sub, without options. sub define_basic_test_cases { my %expect_capitalize_title_default = ( 'This And ThAt' => 'This and That', "Revenge is Doom's" => "Revenge Is Doom's", 'the end of the dream: three-holed button manufacture in a four-holed world' => 'The End of the Dream: Three-Holed Button Manufacture in a Four-Holed World', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'Chords Against Culture -- Counter-Sexist Themes in the Later Works of Fetal Tissue Kleenex', 'a history of n.a.s.a.' => 'A History of N.A.S.A.', 'the n.a.s.a. sucks rag' => 'The N.A.S.A. Sucks Rag', 's.a.d. days t.a.n. shades' => 'S.A.D. Days T.A.N. Shades', 'it\'s the man\'s, you know?' => 'It\'s the Man\'s, You Know?', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'Hey Doc the Ticker Is Hocked, the Dial Is Locked, the Face Is Botoxed, Whazzup?', 'Hell\'s Swells' => 'Hell\'s Swells', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'You\'re Wrong, It Doesn\'t Fly, It\'s Not There and They\'re Lost, so You\'d Better Not', 'DOODZ I AM SO THERE! NOT.' => 'Doodz I Am so There! Not.', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis Called Perserverence in a Good Cause, and Obstinacy in a Bad One.', 'And the rest is silence...' => 'And the Rest Is Silence...', 'a brief history of the word of' => 'A Brief History of the Word Of', 'AWOL in the DMZ of WWIII' => 'Awol in the Dmz of Wwiii', 'TLAs i have known and loved' => 'Tlas I Have Known and Loved', 'The Next iMac: Just Another NeXt?' => 'The Next Imac: Just Another Next?', 'Mr. Wong and Dr. And Report' => 'Mr. Wong and Dr. And Report', 'Quinn Weaver, agent of SFPUG' => 'Quinn Weaver, Agent of Sfpug', 'sarcasm, yet' => 'Sarcasm, Yet', 'sarcasm yet not humor' => 'Sarcasm yet Not Humor', '...and justice for all' => '...And Justice for All', 'kill \'em all' => 'Kill \'Em All', 'history of the gort-verada-nictu moving company' => 'History of the Gort-Verada-Nictu Moving Company', 'Erratic spacing: your KEY to creativity ' => 'Erratic Spacing: Your Key to Creativity', 'it came from texas: the new new world order?' => 'It Came From Texas: The New New World Order?', 'pOiksIFiciZaLaTIonoRyISM' => 'Poiksificizalationoryism', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'What About: A an the and or nor for but so yet Not to of by at for but in, Huh?', 'Ah ha: and so forth' => 'Ah Ha: And so Forth', 'a theory I have' => 'A Theory I Have', 'and/or testified it shall be' => 'And/or Testified It Shall Be', '...nor lost, nor found' => '...Nor Lost, nor Found', 'Ask not' => 'Ask Not', '\'for not!\', he said.' => '\'For Not!\', He Said.', '"but so!", sayeth I' => '"But So!", Sayeth I', 'The wind whispers "But!"' => 'The Wind Whispers "But!"', 'say "but!", say what?' => 'Say "But!", Say What?', 'yet by and by but in for to' => 'Yet by and by but in for To', '-- ack, ack, bang!' => '-- Ack, Ack, Bang!', ' very spacey ' => 'Very Spacey', ' ...huh? ' => '...Huh?', 'Baron von Arnheim\'s revenge' => 'Baron von Arnheim\'s Revenge', 'forget gilroy, A. Snakhausem was here' => 'Forget Gilroy, A. Snakhausem Was Here', 'The 13 Clocks' => 'The 13 Clocks', 'The 4 False Weapons' => 'The 4 False Weapons', '10 Little-Endians' => '10 Little-Endians', 'the dirty 27' => 'The Dirty 27', 'machine13' => 'Machine13', 'ice9count0' => 'Ice9count0', 'Why? Well, why not?' => 'Why? Well, Why Not?', 'Ping... ping... ping... pong!' => 'Ping... Ping... Ping... Pong!', 'Document. Test. Code. Repeat.' => 'Document. Test. Code. Repeat.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'And so They Tramped On Through the Night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...', 'And more. And still more.' => 'And More. And Still More.', 'mo\' beta-testing' => 'Mo\' Beta-Testing', 'a laboratory of the open fields' => 'A Laboratory of the Open Fields', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'Scientific Study of the so-Called Psychical Processes in the Higher Animals', 'The Running-Down of the Universe' => 'The Running-Down of the Universe', 'In the beginning... was the global-set-key' => 'In the Beginning... Was the Global-Set-Key', 'how should one read a book?' => 'How Should One Read a Book?', 'of beauty' => 'Of Beauty', 'on style' => 'On Style', 'As I Ebb\'d with the Ocean of Life' => 'As I Ebb\'d with the Ocean of Life', 'When I Heard the Learn\'d Astronomer' => 'When I Heard the Learn\'d Astronomer', 'From Pent-Up Aching Rivers' => 'From Pent-Up Aching Rivers', 'One\'s Self I Sing' => 'One\'s Self I Sing', 'BEAT! BEAT! DRUMS!' => 'Beat! Beat! Drums!', 'The Wound-Dresser' => 'The Wound-Dresser', 'Pain--has an Element of Blank' => 'Pain--Has an Element of Blank', '' => '', ); return \%expect_capitalize_title_default; } sub define_basic_test_cases_i18n { my %expect_capitalize_title_default_i18n = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'Didaktische Überlegungen/Erfahrungsbericht Über Den Computereinsatz Im Geisteswissenschaftlichen Unterricht Am Bsp. "Historische Zeitung"', 'über maus' => 'Über Maus', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'Explicación Dél Significado de Los Términos Utilizados En "Don Quijote", Por Capítulo.', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', ); return \%expect_capitalize_title_default_i18n; } Text-Capitalize-1.5/t/003-captitle-preserve_whitespace.t000444001750001750 2042113543270756 22437 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 002-capitalize_title-default.t' ######################### use warnings; use strict; $|=1; use utf8; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; # use Test::More tests => 77; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw(capitalize_title); use Test::Locale::Utils qw(:all); my $i18n_system = is_locale_international(); { foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_WHITESPACE => 1); is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_WHITESPACE => 1); is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; capitalize_title($anything, PRESERVE_WHITESPACE => 1); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } # Hashref of test cases (keys) and expected results (values) for the # vanillia "capitalize_title" sub, without options. sub define_basic_test_cases { my %expect_capitalize_title_PRESERVE_WHITESPACE = ( 'This And ThAt' => 'This and That', "Revenge is Doom's" => "Revenge Is Doom's", 'the end of the dream: three-holed button manufacture in a four-holed world' => 'The End of the Dream: Three-Holed Button Manufacture in a Four-Holed World', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'Chords Against Culture -- Counter-Sexist Themes in the Later Works of Fetal Tissue Kleenex', 'a history of n.a.s.a.' => 'A History of N.A.S.A.', 'the n.a.s.a. sucks rag' => 'The N.A.S.A. Sucks Rag', 's.a.d. days t.a.n. shades' => 'S.A.D. Days T.A.N. Shades', 'it\'s the man\'s, you know?' => 'It\'s the Man\'s, You Know?', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'Hey Doc the Ticker Is Hocked, the Dial Is Locked, the Face Is Botoxed, Whazzup?', 'Hell\'s Swells' => 'Hell\'s Swells', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'You\'re Wrong, It Doesn\'t Fly, It\'s Not There and They\'re Lost, so You\'d Better Not', 'DOODZ I AM SO THERE! NOT.' => 'Doodz I Am so There! Not.', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis Called Perserverence in a Good Cause, and Obstinacy in a Bad One.', 'And the rest is silence...' => 'And the Rest Is Silence...', 'a brief history of the word of' => 'A Brief History of the Word Of', 'AWOL in the DMZ of WWIII' => 'Awol in the Dmz of Wwiii', 'TLAs i have known and loved' => 'Tlas I Have Known and Loved', 'The Next iMac: Just Another NeXt?' => 'The Next Imac: Just Another Next?', 'Mr. Wong and Dr. And Report' => 'Mr. Wong and Dr. And Report', 'Quinn Weaver, agent of SFPUG' => 'Quinn Weaver, Agent of Sfpug', 'sarcasm, yet' => 'Sarcasm, Yet', 'sarcasm yet not humor' => 'Sarcasm yet Not Humor', '...and justice for all' => '...And Justice for All', 'kill \'em all' => 'Kill \'Em All', 'history of the gort-verada-nictu moving company' => 'History of the Gort-Verada-Nictu Moving Company', 'Erratic spacing: your KEY to creativity ' => 'Erratic Spacing: Your Key to Creativity ', 'it came from texas: the new new world order?' => 'It Came From Texas: The New New World Order?', 'pOiksIFiciZaLaTIonoRyISM' => 'Poiksificizalationoryism', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'What About: A an the and or nor for but so yet Not to of by at for but in, Huh?', 'Ah ha: and so forth' => 'Ah Ha: And so Forth', 'a theory I have' => 'A Theory I Have', 'and/or testified it shall be' => 'And/or Testified It Shall Be', '...nor lost, nor found' => '...Nor Lost, nor Found', 'Ask not' => 'Ask Not', '\'for not!\', he said.' => '\'For Not!\', He Said.', '"but so!", sayeth I' => '"But So!", Sayeth I', 'The wind whispers "But!"' => 'The Wind Whispers "But!"', 'say "but!", say what?' => 'Say "But!", Say What?', 'yet by and by but in for to' => 'Yet by and by but in for To', '-- ack, ack, bang!' => '-- Ack, Ack, Bang!', ' very spacey ' => ' Very Spacey ', ' ...huh? ' => ' ...Huh? ', 'Baron von Arnheim\'s revenge' => 'Baron von Arnheim\'s Revenge', 'forget gilroy, A. Snakhausem was here' => 'Forget Gilroy, A. Snakhausem Was Here', 'The 13 Clocks' => 'The 13 Clocks', 'The 4 False Weapons' => 'The 4 False Weapons', '10 Little-Endians' => '10 Little-Endians', 'the dirty 27' => 'The Dirty 27', 'machine13' => 'Machine13', 'ice9count0' => 'Ice9count0', 'Why? Well, why not?' => 'Why? Well, Why Not?', 'Ping... ping... ping... pong!' => 'Ping... Ping... Ping... Pong!', 'Document. Test. Code. Repeat.' => 'Document. Test. Code. Repeat.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'And so They Tramped On Through the Night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...', 'And more. And still more.' => 'And More. And Still More.', 'mo\' beta-testing' => 'Mo\' Beta-Testing', 'a laboratory of the open fields' => 'A Laboratory of the Open Fields', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'Scientific Study of the so-Called Psychical Processes in the Higher Animals', 'The Running-Down of the Universe' => 'The Running-Down of the Universe', 'In the beginning... was the global-set-key' => 'In the Beginning... Was the Global-Set-Key', 'how should one read a book?' => 'How Should One Read a Book?', 'of beauty' => 'Of Beauty', 'on style' => 'On Style', 'As I Ebb\'d with the Ocean of Life' => 'As I Ebb\'d with the Ocean of Life', 'When I Heard the Learn\'d Astronomer' => 'When I Heard the Learn\'d Astronomer', 'From Pent-Up Aching Rivers' => 'From Pent-Up Aching Rivers', 'One\'s Self I Sing' => 'One\'s Self I Sing', 'BEAT! BEAT! DRUMS!' => 'Beat! Beat! Drums!', 'The Wound-Dresser' => 'The Wound-Dresser', 'Pain--has an Element of Blank' => 'Pain--Has an Element of Blank', '' => '', ); return \%expect_capitalize_title_PRESERVE_WHITESPACE; } sub define_basic_test_cases_i18n { my %expect_capitalize_title_PRESERVE_WHITESPACE = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'Didaktische Überlegungen/Erfahrungsbericht Über Den Computereinsatz Im Geisteswissenschaftlichen Unterricht Am Bsp. "Historische Zeitung"', 'über maus' => 'Über Maus', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'Explicación Dél Significado de Los Términos Utilizados En "Don Quijote", Por Capítulo.', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', ); return \%expect_capitalize_title_PRESERVE_WHITESPACE; } Text-Capitalize-1.5/t/004-captitle-preserve_allcaps.t000444001750001750 2020213543270756 21720 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 002-capitalize_title-default.t' ######################### use warnings; use strict; $|=1; use utf8; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; # use Test::More tests => 77; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw(capitalize_title); use Test::Locale::Utils qw(:all); my $i18n_system = is_locale_international(); { foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_ALLCAPS => 1); is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_ALLCAPS => 1); is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; capitalize_title($anything, PRESERVE_ALLCAPS => 1); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } sub define_basic_test_cases { my %expect_capitalize_title_PRESERVE_ALLCAPS = ( 'This And ThAt' => 'This and That', "Revenge is Doom's" => "Revenge Is Doom's", 'the end of the dream: three-holed button manufacture in a four-holed world' => 'The End of the Dream: Three-Holed Button Manufacture in a Four-Holed World', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'Chords Against Culture -- Counter-Sexist Themes in the Later Works of Fetal Tissue Kleenex', 'a history of n.a.s.a.' => 'A History of N.A.S.A.', 'the n.a.s.a. sucks rag' => 'The N.A.S.A. Sucks Rag', 's.a.d. days t.a.n. shades' => 'S.A.D. Days T.A.N. Shades', 'it\'s the man\'s, you know?' => 'It\'s the Man\'s, You Know?', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'Hey Doc the Ticker Is Hocked, the Dial Is Locked, the Face Is Botoxed, Whazzup?', 'Hell\'s Swells' => 'Hell\'s Swells', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'You\'re Wrong, It Doesn\'t Fly, It\'s Not There and They\'re Lost, so You\'d Better Not', 'DOODZ I AM SO THERE! NOT.' => 'DOODZ I AM SO THERE! NOT.', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis Called Perserverence in a Good Cause, and Obstinacy in a Bad One.', 'And the rest is silence...' => 'And the Rest Is Silence...', 'a brief history of the word of' => 'A Brief History of the Word Of', 'AWOL in the DMZ of WWIII' => 'AWOL in the DMZ of WWIII', 'TLAs i have known and loved' => 'Tlas I Have Known and Loved', 'The Next iMac: Just Another NeXt?' => 'The Next Imac: Just Another Next?', 'Mr. Wong and Dr. And Report' => 'Mr. Wong and Dr. And Report', 'Quinn Weaver, agent of SFPUG' => 'Quinn Weaver, Agent of SFPUG', 'sarcasm, yet' => 'Sarcasm, Yet', 'sarcasm yet not humor' => 'Sarcasm yet Not Humor', '...and justice for all' => '...And Justice for All', 'kill \'em all' => 'Kill \'Em All', 'history of the gort-verada-nictu moving company' => 'History of the Gort-Verada-Nictu Moving Company', 'Erratic spacing: your KEY to creativity ' => 'Erratic Spacing: Your KEY to Creativity', 'it came from texas: the new new world order?' => 'It Came From Texas: The New New World Order?', 'pOiksIFiciZaLaTIonoRyISM' => 'Poiksificizalationoryism', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'What About: A an the and or nor for but so yet Not to of by at for but in, Huh?', 'Ah ha: and so forth' => 'Ah Ha: And so Forth', 'a theory I have' => 'A Theory I Have', 'and/or testified it shall be' => 'And/or Testified It Shall Be', '...nor lost, nor found' => '...Nor Lost, nor Found', 'Ask not' => 'Ask Not', '\'for not!\', he said.' => '\'For Not!\', He Said.', '"but so!", sayeth I' => '"But So!", Sayeth I', 'The wind whispers "But!"' => 'The Wind Whispers "But!"', 'say "but!", say what?' => 'Say "But!", Say What?', 'yet by and by but in for to' => 'Yet by and by but in for To', '-- ack, ack, bang!' => '-- Ack, Ack, Bang!', ' very spacey ' => 'Very Spacey', ' ...huh? ' => '...Huh?', 'Baron von Arnheim\'s revenge' => 'Baron von Arnheim\'s Revenge', 'forget gilroy, A. Snakhausem was here' => 'Forget Gilroy, A. Snakhausem Was Here', 'The 13 Clocks' => 'The 13 Clocks', 'The 4 False Weapons' => 'The 4 False Weapons', '10 Little-Endians' => '10 Little-Endians', 'the dirty 27' => 'The Dirty 27', 'machine13' => 'Machine13', 'ice9count0' => 'Ice9count0', 'Why? Well, why not?' => 'Why? Well, Why Not?', 'Ping... ping... ping... pong!' => 'Ping... Ping... Ping... Pong!', 'Document. Test. Code. Repeat.' => 'Document. Test. Code. Repeat.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'And so They Tramped On Through the Night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...', 'And more. And still more.' => 'And More. And Still More.', 'mo\' beta-testing' => 'Mo\' Beta-Testing', 'a laboratory of the open fields' => 'A Laboratory of the Open Fields', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'Scientific Study of the so-Called Psychical Processes in the Higher Animals', 'The Running-Down of the Universe' => 'The Running-Down of the Universe', 'In the beginning... was the global-set-key' => 'In the Beginning... Was the Global-Set-Key', 'how should one read a book?' => 'How Should One Read a Book?', 'of beauty' => 'Of Beauty', 'on style' => 'On Style', 'As I Ebb\'d with the Ocean of Life' => 'As I Ebb\'d with the Ocean of Life', 'When I Heard the Learn\'d Astronomer' => 'When I Heard the Learn\'d Astronomer', 'From Pent-Up Aching Rivers' => 'From Pent-Up Aching Rivers', 'One\'s Self I Sing' => 'One\'s Self I Sing', 'BEAT! BEAT! DRUMS!' => 'BEAT! BEAT! DRUMS!', 'The Wound-Dresser' => 'The Wound-Dresser', 'Pain--has an Element of Blank' => 'Pain--Has an Element of Blank', '' => '', ); return \%expect_capitalize_title_PRESERVE_ALLCAPS; } sub define_basic_test_cases_i18n { my %expect_capitalize_title_PRESERVE_ALLCAPS_i18n = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'Didaktische Überlegungen/Erfahrungsbericht Über Den Computereinsatz Im Geisteswissenschaftlichen Unterricht Am Bsp. "Historische Zeitung"', 'über maus' => 'Über Maus', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'Explicación Dél Significado de Los Términos Utilizados En "Don Quijote", Por Capítulo.', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', ); return \%expect_capitalize_title_PRESERVE_ALLCAPS_i18n; } Text-Capitalize-1.5/t/005-captitle-preserve_anycaps.t000444001750001750 2020313543270756 21741 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as 'perl 005-captitle-preserve_anycaps.t' ######################### use warnings; use strict; $|=1; use utf8; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; # use Test::More tests => 77; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw(capitalize_title); use Test::Locale::Utils qw(:all); my $i18n_system = is_locale_international(); { foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_ANYCAPS => 1); is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_ANYCAPS => 1); is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; capitalize_title($anything, PRESERVE_ANYCAPS => 1); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } sub define_basic_test_cases { my %expect_capitalize_title_PRESERVE_ANYCAPS = ( 'This And ThAt'=> 'This And ThAt', "Revenge is Doom's" => "Revenge Is Doom's", 'the end of the dream: three-holed button manufacture in a four-holed world' => 'The End of the Dream: Three-Holed Button Manufacture in a Four-Holed World', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'Chords Against Culture -- Counter-Sexist Themes in the Later Works of Fetal Tissue Kleenex', 'a history of n.a.s.a.' => 'A History of N.A.S.A.', 'the n.a.s.a. sucks rag' => 'The N.A.S.A. Sucks Rag', 's.a.d. days t.a.n. shades' => 'S.A.D. Days T.A.N. Shades', 'it\'s the man\'s, you know?' => 'It\'s the Man\'s, You Know?', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'Hey Doc the Ticker Is Hocked, the Dial Is Locked, the Face Is Botoxed, Whazzup?', 'Hell\'s Swells' => 'Hell\'s Swells', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'You\'re Wrong, It Doesn\'t Fly, It\'s Not There and They\'re Lost, so You\'d Better Not', 'DOODZ I AM SO THERE! NOT.' => 'DOODZ I AM SO THERE! NOT.', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis Called Perserverence in a Good Cause, and Obstinacy in a Bad One.', 'And the rest is silence...' => 'And the Rest Is Silence...', 'a brief history of the word of' => 'A Brief History of the Word Of', 'AWOL in the DMZ of WWIII' => 'AWOL in the DMZ of WWIII', 'TLAs i have known and loved' => 'TLAs I Have Known and Loved', 'The Next iMac: Just Another NeXt?' => 'The Next iMac: Just Another NeXt?', 'Mr. Wong and Dr. And Report' => 'Mr. Wong and Dr. And Report', 'Quinn Weaver, agent of SFPUG' => 'Quinn Weaver, Agent of SFPUG', 'sarcasm, yet' => 'Sarcasm, Yet', 'sarcasm yet not humor' => 'Sarcasm yet Not Humor', '...and justice for all' => '...And Justice for All', 'kill \'em all' => 'Kill \'Em All', 'history of the gort-verada-nictu moving company' => 'History of the Gort-Verada-Nictu Moving Company', 'Erratic spacing: your KEY to creativity ' => 'Erratic Spacing: Your KEY to Creativity', 'it came from texas: the new new world order?' => 'It Came From Texas: The New New World Order?', 'pOiksIFiciZaLaTIonoRyISM' => 'pOiksIFiciZaLaTIonoRyISM', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'What About: A an the and or nor for but so yet Not to of by at for but in, Huh?', 'Ah ha: and so forth' => 'Ah Ha: And so Forth', 'a theory I have' => 'A Theory I Have', 'and/or testified it shall be' => 'And/or Testified It Shall Be', '...nor lost, nor found' => '...Nor Lost, nor Found', 'Ask not' => 'Ask Not', '\'for not!\', he said.' => '\'For Not!\', He Said.', '"but so!", sayeth I' => '"But So!", Sayeth I', 'The wind whispers "But!"' => 'The Wind Whispers "But!"', 'say "but!", say what?' => 'Say "But!", Say What?', 'yet by and by but in for to' => 'Yet by and by but in for To', '-- ack, ack, bang!' => '-- Ack, Ack, Bang!', ' very spacey ' => 'Very Spacey', ' ...huh? ' => '...Huh?', 'Baron von Arnheim\'s revenge' => 'Baron von Arnheim\'s Revenge', 'forget gilroy, A. Snakhausem was here' => 'Forget Gilroy, A. Snakhausem Was Here', 'The 13 Clocks' => 'The 13 Clocks', 'The 4 False Weapons' => 'The 4 False Weapons', '10 Little-Endians' => '10 Little-Endians', 'the dirty 27' => 'The Dirty 27', 'machine13' => 'Machine13', 'ice9count0' => 'Ice9count0', 'Why? Well, why not?' => 'Why? Well, Why Not?', 'Ping... ping... ping... pong!' => 'Ping... Ping... Ping... Pong!', 'Document. Test. Code. Repeat.' => 'Document. Test. Code. Repeat.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'And so They Tramped On Through the Night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...', 'And more. And still more.' => 'And More. And Still More.', 'mo\' beta-testing' => 'Mo\' Beta-Testing', 'a laboratory of the open fields' => 'A Laboratory of the Open Fields', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'Scientific Study of the So-Called Psychical Processes in the Higher Animals', 'The Running-Down of the Universe' => 'The Running-Down of the Universe', 'In the beginning... was the global-set-key' => 'In the Beginning... Was the Global-Set-Key', 'how should one read a book?' => 'How Should One Read a Book?', 'of beauty' => 'Of Beauty', 'on style' => 'On Style', 'As I Ebb\'d with the Ocean of Life' => 'As I Ebb\'d with the Ocean of Life', 'When I Heard the Learn\'d Astronomer' => 'When I Heard the Learn\'d Astronomer', 'From Pent-Up Aching Rivers' => 'From Pent-Up Aching Rivers', 'One\'s Self I Sing' => 'One\'s Self I Sing', 'BEAT! BEAT! DRUMS!' => 'BEAT! BEAT! DRUMS!', 'The Wound-Dresser' => 'The Wound-Dresser', 'Pain--has an Element of Blank' => 'Pain--Has an Element of Blank', '' => '', ); return \%expect_capitalize_title_PRESERVE_ANYCAPS; } sub define_basic_test_cases_i18n { my %expect_capitalize_title_PRESERVE_ANYCAPS_i18n = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'Didaktische Überlegungen/Erfahrungsbericht Über Den Computereinsatz Im Geisteswissenschaftlichen Unterricht Am Bsp. "Historische Zeitung"', 'über maus' => 'Über Maus', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'Explicación Dél Significado de Los Términos Utilizados En "Don Quijote", Por Capítulo.', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', ); return \%expect_capitalize_title_PRESERVE_ANYCAPS_i18n; } Text-Capitalize-1.5/t/006-captitle-preserve_allcaps_and_whitespace.t000444001750001750 2117413543270756 24771 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 002-capitalize_title-default.t' ######################### use warnings; use strict; $|=1; use utf8; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; # use Test::More tests => 77; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw(capitalize_title); use Test::Locale::Utils qw(:all); my $i18n_system = is_locale_international(); { foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = capitalize_title( $case, PRESERVE_ALLCAPS => 1, PRESERVE_WHITESPACE => 1); is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title( $case, PRESERVE_ALLCAPS => 1, PRESERVE_WHITESPACE => 1); is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; capitalize_title($anything, PRESERVE_ALLCAPS => 1, PRESERVE_WHITESPACE => 1); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } ####### ### end main, into the subs # Hashref of test cases (keys) and expected results (values) for the # vanillia "capitalize_title" sub, without options. sub define_basic_test_cases { my %expect_capitalize_title_PRESERVE_ALLCAPS_PRESERVE_WHITESPACE = ( 'This And ThAt' => 'This and That', "Revenge is Doom's" => "Revenge Is Doom's", 'the end of the dream: three-holed button manufacture in a four-holed world' => 'The End of the Dream: Three-Holed Button Manufacture in a Four-Holed World', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'Chords Against Culture -- Counter-Sexist Themes in the Later Works of Fetal Tissue Kleenex', 'a history of n.a.s.a.' => 'A History of N.A.S.A.', 'the n.a.s.a. sucks rag' => 'The N.A.S.A. Sucks Rag', 's.a.d. days t.a.n. shades' => 'S.A.D. Days T.A.N. Shades', 'it\'s the man\'s, you know?' => 'It\'s the Man\'s, You Know?', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'Hey Doc the Ticker Is Hocked, the Dial Is Locked, the Face Is Botoxed, Whazzup?', 'Hell\'s Swells' => 'Hell\'s Swells', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'You\'re Wrong, It Doesn\'t Fly, It\'s Not There and They\'re Lost, so You\'d Better Not', 'DOODZ I AM SO THERE! NOT.' => 'DOODZ I AM SO THERE! NOT.', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis Called Perserverence in a Good Cause, and Obstinacy in a Bad One.', 'And the rest is silence...' => 'And the Rest Is Silence...', 'a brief history of the word of' => 'A Brief History of the Word Of', 'AWOL in the DMZ of WWIII' => 'AWOL in the DMZ of WWIII', 'TLAs i have known and loved' => 'Tlas I Have Known and Loved', 'The Next iMac: Just Another NeXt?' => 'The Next Imac: Just Another Next?', 'Mr. Wong and Dr. And Report' => 'Mr. Wong and Dr. And Report', 'Quinn Weaver, agent of SFPUG' => 'Quinn Weaver, Agent of SFPUG', 'sarcasm, yet' => 'Sarcasm, Yet', 'sarcasm yet not humor' => 'Sarcasm yet Not Humor', '...and justice for all' => '...And Justice for All', 'kill \'em all' => 'Kill \'Em All', 'history of the gort-verada-nictu moving company' => 'History of the Gort-Verada-Nictu Moving Company', 'Erratic spacing: your KEY to creativity ' => 'Erratic Spacing: Your KEY to Creativity ', 'it came from texas: the new new world order?' => 'It Came From Texas: The New New World Order?', 'pOiksIFiciZaLaTIonoRyISM' => 'Poiksificizalationoryism', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'What About: A an the and or nor for but so yet Not to of by at for but in, Huh?', 'Ah ha: and so forth' => 'Ah Ha: And so Forth', 'a theory I have' => 'A Theory I Have', 'and/or testified it shall be' => 'And/or Testified It Shall Be', '...nor lost, nor found' => '...Nor Lost, nor Found', 'Ask not' => 'Ask Not', '\'for not!\', he said.' => '\'For Not!\', He Said.', '"but so!", sayeth I' => '"But So!", Sayeth I', 'The wind whispers "But!"' => 'The Wind Whispers "But!"', 'say "but!", say what?' => 'Say "But!", Say What?', 'yet by and by but in for to' => 'Yet by and by but in for To', '-- ack, ack, bang!' => '-- Ack, Ack, Bang!', ' very spacey ' => ' Very Spacey ', ' ...huh? ' => ' ...Huh? ', 'Baron von Arnheim\'s revenge' => 'Baron von Arnheim\'s Revenge', 'forget gilroy, A. Snakhausem was here' => 'Forget Gilroy, A. Snakhausem Was Here', 'The 13 Clocks' => 'The 13 Clocks', 'The 4 False Weapons' => 'The 4 False Weapons', '10 Little-Endians' => '10 Little-Endians', 'the dirty 27' => 'The Dirty 27', 'machine13' => 'Machine13', 'ice9count0' => 'Ice9count0', 'Why? Well, why not?' => 'Why? Well, Why Not?', 'Ping... ping... ping... pong!' => 'Ping... Ping... Ping... Pong!', 'Document. Test. Code. Repeat.' => 'Document. Test. Code. Repeat.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'And so They Tramped On Through the Night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...', 'And more. And still more.' => 'And More. And Still More.', 'mo\' beta-testing' => 'Mo\' Beta-Testing', 'a laboratory of the open fields' => 'A Laboratory of the Open Fields', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'Scientific Study of the so-Called Psychical Processes in the Higher Animals', 'The Running-Down of the Universe' => 'The Running-Down of the Universe', 'In the beginning... was the global-set-key' => 'In the Beginning... Was the Global-Set-Key', 'how should one read a book?' => 'How Should One Read a Book?', 'of beauty' => 'Of Beauty', 'on style' => 'On Style', 'As I Ebb\'d with the Ocean of Life' => 'As I Ebb\'d with the Ocean of Life', 'When I Heard the Learn\'d Astronomer' => 'When I Heard the Learn\'d Astronomer', 'From Pent-Up Aching Rivers' => 'From Pent-Up Aching Rivers', 'One\'s Self I Sing' => 'One\'s Self I Sing', 'BEAT! BEAT! DRUMS!' => 'BEAT! BEAT! DRUMS!', 'The Wound-Dresser' => 'The Wound-Dresser', 'Pain--has an Element of Blank' => 'Pain--Has an Element of Blank', '' => '', ); return \%expect_capitalize_title_PRESERVE_ALLCAPS_PRESERVE_WHITESPACE; } sub define_basic_test_cases_i18n { my %expect_capitalize_title_PRESERVE_ALLCAPS_PRESERVE_WHITESPACE = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'Didaktische Überlegungen/Erfahrungsbericht Über Den Computereinsatz Im Geisteswissenschaftlichen Unterricht Am Bsp. "Historische Zeitung"', 'über maus' => 'Über Maus', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'Explicación Dél Significado de Los Términos Utilizados En "Don Quijote", Por Capítulo.', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', ); return \%expect_capitalize_title_PRESERVE_ALLCAPS_PRESERVE_WHITESPACE; } Text-Capitalize-1.5/t/007-captitle-preserve_anycaps_and_whitespace.t000444001750001750 2113213543270756 25003 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 002-capitalize_title-default.t' ######################### use warnings; use strict; $|=1; use utf8; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; # use Test::More tests => 77; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw(capitalize_title); use Test::Locale::Utils qw(:all); my $i18n_system = is_locale_international(); { foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_ANYCAPS => 1, PRESERVE_WHITESPACE => 1); is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title($case, PRESERVE_ANYCAPS => 1, PRESERVE_WHITESPACE => 1); is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; capitalize_title($anything, PRESERVE_ANYCAPS => 1, PRESERVE_WHITESPACE => 1); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } ####### ### end main, into the subs # Hashref of test cases (keys) and expected results (values) for the # vanillia "capitalize_title" sub, without options. sub define_basic_test_cases { my %expect_capitalize_title_PRESERVE_ANYCAPS_PRESERVE_WHITESPACE = ( 'This And ThAt' => 'This And ThAt', "Revenge is Doom's" => "Revenge Is Doom's", 'the end of the dream: three-holed button manufacture in a four-holed world' => 'The End of the Dream: Three-Holed Button Manufacture in a Four-Holed World', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'Chords Against Culture -- Counter-Sexist Themes in the Later Works of Fetal Tissue Kleenex', 'a history of n.a.s.a.' => 'A History of N.A.S.A.', 'the n.a.s.a. sucks rag' => 'The N.A.S.A. Sucks Rag', 's.a.d. days t.a.n. shades' => 'S.A.D. Days T.A.N. Shades', 'it\'s the man\'s, you know?' => 'It\'s the Man\'s, You Know?', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'Hey Doc the Ticker Is Hocked, the Dial Is Locked, the Face Is Botoxed, Whazzup?', 'Hell\'s Swells' => 'Hell\'s Swells', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'You\'re Wrong, It Doesn\'t Fly, It\'s Not There and They\'re Lost, so You\'d Better Not', 'DOODZ I AM SO THERE! NOT.' => 'DOODZ I AM SO THERE! NOT.', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis Called Perserverence in a Good Cause, and Obstinacy in a Bad One.', 'And the rest is silence...' => 'And the Rest Is Silence...', 'a brief history of the word of' => 'A Brief History of the Word Of', 'AWOL in the DMZ of WWIII' => 'AWOL in the DMZ of WWIII', 'TLAs i have known and loved' => 'TLAs I Have Known and Loved', 'The Next iMac: Just Another NeXt?' => 'The Next iMac: Just Another NeXt?', 'Mr. Wong and Dr. And Report' => 'Mr. Wong and Dr. And Report', 'Quinn Weaver, agent of SFPUG' => 'Quinn Weaver, Agent of SFPUG', 'sarcasm, yet' => 'Sarcasm, Yet', 'sarcasm yet not humor' => 'Sarcasm yet Not Humor', '...and justice for all' => '...And Justice for All', 'kill \'em all' => 'Kill \'Em All', 'history of the gort-verada-nictu moving company' => 'History of the Gort-Verada-Nictu Moving Company', 'Erratic spacing: your KEY to creativity ' => 'Erratic Spacing: Your KEY to Creativity ', 'it came from texas: the new new world order?' => 'It Came From Texas: The New New World Order?', 'pOiksIFiciZaLaTIonoRyISM' => 'pOiksIFiciZaLaTIonoRyISM', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'What About: A an the and or nor for but so yet Not to of by at for but in, Huh?', 'Ah ha: and so forth' => 'Ah Ha: And so Forth', 'a theory I have' => 'A Theory I Have', 'and/or testified it shall be' => 'And/or Testified It Shall Be', '...nor lost, nor found' => '...Nor Lost, nor Found', 'Ask not' => 'Ask Not', '\'for not!\', he said.' => '\'For Not!\', He Said.', '"but so!", sayeth I' => '"But So!", Sayeth I', 'The wind whispers "But!"' => 'The Wind Whispers "But!"', 'say "but!", say what?' => 'Say "But!", Say What?', 'yet by and by but in for to' => 'Yet by and by but in for To', '-- ack, ack, bang!' => '-- Ack, Ack, Bang!', ' very spacey ' => ' Very Spacey ', ' ...huh? ' => ' ...Huh? ', 'Baron von Arnheim\'s revenge' => 'Baron von Arnheim\'s Revenge', 'forget gilroy, A. Snakhausem was here' => 'Forget Gilroy, A. Snakhausem Was Here', 'The 13 Clocks' => 'The 13 Clocks', 'The 4 False Weapons' => 'The 4 False Weapons', '10 Little-Endians' => '10 Little-Endians', 'the dirty 27' => 'The Dirty 27', 'machine13' => 'Machine13', 'ice9count0' => 'Ice9count0', 'Why? Well, why not?' => 'Why? Well, Why Not?', 'Ping... ping... ping... pong!' => 'Ping... Ping... Ping... Pong!', 'Document. Test. Code. Repeat.' => 'Document. Test. Code. Repeat.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'And so They Tramped On Through the Night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...', 'And more. And still more.' => 'And More. And Still More.', 'mo\' beta-testing' => 'Mo\' Beta-Testing', 'a laboratory of the open fields' => 'A Laboratory of the Open Fields', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'Scientific Study of the So-Called Psychical Processes in the Higher Animals', 'The Running-Down of the Universe' => 'The Running-Down of the Universe', 'In the beginning... was the global-set-key' => 'In the Beginning... Was the Global-Set-Key', 'how should one read a book?' => 'How Should One Read a Book?', 'of beauty' => 'Of Beauty', 'on style' => 'On Style', 'As I Ebb\'d with the Ocean of Life' => 'As I Ebb\'d with the Ocean of Life', 'When I Heard the Learn\'d Astronomer' => 'When I Heard the Learn\'d Astronomer', 'From Pent-Up Aching Rivers' => 'From Pent-Up Aching Rivers', 'One\'s Self I Sing' => 'One\'s Self I Sing', 'BEAT! BEAT! DRUMS!' => 'BEAT! BEAT! DRUMS!', 'The Wound-Dresser' => 'The Wound-Dresser', 'Pain--has an Element of Blank' => 'Pain--Has an Element of Blank', '' => '', ); return \%expect_capitalize_title_PRESERVE_ANYCAPS_PRESERVE_WHITESPACE; } sub define_basic_test_cases_i18n { my %expect_capitalize_title_PRESERVE_ANYCAPS_PRESERVE_WHITESPACE = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'Didaktische Überlegungen/Erfahrungsbericht Über Den Computereinsatz Im Geisteswissenschaftlichen Unterricht Am Bsp. "Historische Zeitung"', 'über maus' => 'Über Maus', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'Explicación Dél Significado de Los Términos Utilizados En "Don Quijote", Por Capítulo.', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', ); return \%expect_capitalize_title_PRESERVE_ANYCAPS_PRESERVE_WHITESPACE; } Text-Capitalize-1.5/t/008-random_case.t000444001750001750 2203213543270756 17045 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 008-random_case.t' ######################### use warnings; use strict; $|=1; use utf8; use Env qw( $HOME ); my $TESTCASEGEN = 0; open my $TFH, ">", "$HOME/End/Cave/CapitalizeTitle/tmp/tempoutput.$$" or die $! if $TESTCASEGEN; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw( random_case ); use Test::Locale::Utils qw( :all ); my $i18n_system = is_locale_international(); { # seeding with a known value to get repeatable sequence from rand srand(666); # Note: need to sort the test cases, to get the same order that # was used in generating the answer key. foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = random_case( $case ); record_testcase( $case, $result ) if $TESTCASEGEN; is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{$case}; my $result = random_case($case); record_testcase( $case, $result ) if $TESTCASEGEN; is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; random_case($anything); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } ####### ### end main, into the subs # I need to have test case generator code embedded in this *.t # A stand-alone script didn't work... # Something odd about using srand for a repeatable rand sequence? sub record_testcase { my $in = shift; my $out = shift; $in =~ s{'}{\\'}g; $out =~ s{'}{\\'}g; print {$TFH} " '$in' =>\n '$out',\n"; } # Hash of test cases (keys) and expected results (values) for # random_case, when seeded with a known value: srand(666) sub define_basic_test_cases { my %expect_random_case = ( '' => '', ' ...huh? ' => ' ...HUh? ', ' very spacey ' => ' vEry spaCey ', '"but so!", sayeth I' => '"bUt so!", SAyEth I', '\'for not!\', he said.' => '\'FoR NOT!\', hE SAid.', '-- ack, ack, bang!' => '-- ACk, ACK, bAng!', '...and justice for all' => '...aNd JUsTIce fOr All', '...nor lost, nor found' => '...nOr lOsT, noR FOuNd', '10 Little-Endians' => '10 LIttle-ENDiANS', 'AWOL in the DMZ of WWIII' => 'Awol in THE dmZ OF WwiiI', 'Ah ha: and so forth' => 'AH ha: AND SO ForTh', 'And more. And still more.' => 'And morE. ANd STILL more.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'AnD SO ThEY TRAmpEd on tHROUGh tHe NIghT. trAmp. TramP. TramP. TRAMP. Tramp. TramP. trAMp...', 'And the rest is silence...' => 'AnD THe Rest Is SIlEnce...', 'As I Ebb\'d with the Ocean of Life' => 'as i EBb\'D wITH ThE OCeaN of life', 'Ask not' => 'asK nOT', 'BEAT! BEAT! DRUMS!' => 'beaT! beAt! DRUMs!', 'Baron von Arnheim\'s revenge' => 'bARon vON aRNHEim\'s rEvEngE', 'DOODZ I AM SO THERE! NOT.' => 'DoODz i Am so tHEre! NOT.', 'Document. Test. Code. Repeat.' => 'DoCUMeNt. tESt. CoDE. RePeAt.', 'Erratic spacing: your KEY to creativity ' => 'eRRATiC spaciNg: youR keY tO CReATIVity ', 'From Pent-Up Aching Rivers' => 'FrOm Pent-UP acHInG rIvers', 'Hell\'s Swells' => 'HELL\'s SWells', 'In the beginning... was the global-set-key' => 'In tHE BeGInNinG... wAs tHe GlobAL-SeT-Key', 'Mr. Wong and Dr. And Report' => 'Mr. wong aND dr. AnD rEPoRt', 'One\'s Self I Sing' => 'oNE\'S sElF I SiNG', 'Pain--has an Element of Blank' => 'Pain--HAS An eLeMeNT of bLANK', 'Ping... ping... ping... pong!' => 'PING... pINg... PInG... ponG!', 'Quinn Weaver, agent of SFPUG' => 'QuINN wEavER, AGeNt Of sFPUg', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'SciENtiFiC sTUDy oF THE sO-cAlLEd PsYChiCAl pRocESSes In tHE HIGHER AnImAls', 'TLAs i have known and loved' => 'TLaS i HAVe kNOwn anD loVed', 'The 13 Clocks' => 'THE 13 clOCKs', 'The 4 False Weapons' => 'THe 4 falsE weApons', 'The Next iMac: Just Another NeXt?' => 'the nexT ImAC: JUst AnotHer NexT?', 'The Running-Down of the Universe' => 'tHe RunNinG-Down OF ThE uniVERSE', 'The Wound-Dresser' => 'thE WoUNd-dREsseR', 'The wind whispers "But!"' => 'thE WinD whISPErs "bUT!"', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis CALLED persERveRenCE IN A GoOd CauSE, anD oBStINaCY iN A BAD onE.', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'wHAT AbOuT: A AN The AND Or nOr FOR But sO yet NoT TO OF by aT FOR BuT In, HUh?', 'When I Heard the Learn\'d Astronomer' => 'wHEn I hEARD tHE LEarn\'d AstRoNomER', 'Why? Well, why not?' => 'WHY? WElL, WhY NoT?', 'a brief history of the word of' => 'a brIeF hIStORY Of THe wORd of', 'a history of n.a.s.a.' => 'A History OF N.a.S.A.', 'a laboratory of the open fields' => 'a lAbOratory OF tHE open fIELds', 'a theory I have' => 'A thEORY I hAvE', 'and/or testified it shall be' => 'anD/Or TeStifieD it shAll Be', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'cHORDS aGAINST culTurE -- cOUNtEr-SExIsT themEs In tHE LATer Works of FETAL TIsSuE KLEenEx', 'forget gilroy, A. Snakhausem was here' => 'ForgEt GilrOy, a. sNakhaUSeM WAS herE', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'hey DOC THE TIcKEr is hocked, THE DIAL Is lOckeD, The face IS Botoxed, WhazZuP?', 'history of the gort-verada-nictu moving company' => 'historY Of the goRt-veRada-NICtU MoViNG CompANy', 'how should one read a book?' => 'hOW shouLd ONe ReaD A bOoK?', 'ice9count0' => 'ICe9cOUNt0', 'it came from texas: the new new world order?' => 'It cAMe fRoM TEXAS: thE NEw NEW WORlD oRdeR?', 'it\'s the man\'s, you know?' => 'it\'S ThE Man\'S, you KNOW?', 'kill \'em all' => 'KIlL \'em ALl', 'machine13' => 'machiNe13', 'mo\' beta-testing' => 'MO\' beta-TeStiNg', 'of beauty' => 'OF BEAutY', 'on style' => 'On stYle', 'pOiksIFiciZaLaTIonoRyISM' => 'pOiKsIfiCIzaLaTiOnoRyiSM', 's.a.d. days t.a.n. shades' => 's.A.d. DaYs T.a.N. shadEs', 'sarcasm yet not humor' => 'sARCasm YET NoT hUMor', 'sarcasm, yet' => 'sARCAsM, yeT', 'say "but!", say what?' => 'saY "but!", SAy WhAt?', 'the dirty 27' => 'the dirTy 27', 'the end of the dream: three-holed button manufacture in a four-holed world' => 'tHE eNd Of ThE drEAm: THree-holeD BUtTON MANUFAcTUre In A FoUR-HoLED WorLD', 'the n.a.s.a. sucks rag' => 'THE N.a.s.A. SucKs RAg', 'yet by and by but in for to' => 'yET bY AND By BUT in For tO', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'yoU\'RE WronG, IT DoesN\'t flY, IT\'S nOt tHEre and THEY\'RE LOST, sO You\'d betTeR NoT', ); return \%expect_random_case; }; sub define_basic_test_cases_i18n { my %expect_random_case = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'DIdaKtISchE ÜbERLeGUngEN/ErFahrUnGsbERIcht üBer DeN CoMpUtEREInsATz im GeIsTesWissENscHAFTlIchEN unTERRicHt AM BSP. "HISToRIsChe zEITuNg"', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'explIcAciÓn dél SIgniFIcAdo De loS tÉRmiNoS UtiLiZadoS en "don QUIJoTE", pOr CAPÍtULO.', 'où l\'on découvre une époque à travers l\'oeuvre imposante d\'Honoré de Balzac' => 'oÙ l\'on déCouVRE uNE ÉPOQUe à TraVErs l\'OeuvRE iMPOSAnte d\'hONorÉ De bAlzac', 'évêque, qu\'il eût aimé voir infliger à ceux qui ont abdiqué, J\'ai été reçu, and pepe le peau' => 'évÊQue, Qu\'iL eûT AIMÉ vOiR inflIgER à ceux quI onT AbdiQué, J\'aI éTé REçU, and pepe lE peaU', 'über maus' => 'ÜbeR mAuS', ); return \%expect_random_case; } Text-Capitalize-1.5/t/009-scramble_case.t000444001750001750 2211613543270756 17361 0ustar00doomdoom000000000000# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 009-scramble_case.t' ######################### use warnings; use strict; $|=1; use Env qw( $HOME ); use utf8; my $TESTCASEGEN = 0; open my $TFH, ">", "$HOME/End/Cave/CapitalizeTitle/tmp/tempoutput.$$" or die $! if $TESTCASEGEN; use FindBin qw($Bin); use lib ("$Bin/../lib", "$Bin/lib"); my $basic_test_cases = define_basic_test_cases(); my $i18n_test_cases = define_basic_test_cases_i18n(); my $basic_count = scalar( keys( %{ $basic_test_cases } ) ); my $i18n_count = scalar( keys( %{ $i18n_test_cases } ) ); my $total = $basic_count + $i18n_count + 1; # use Test::More tests => 77; use Test::More; plan tests => $total; use Text::Capitalize 0.4 qw(scramble_case); use Test::Locale::Utils qw(:all); my $i18n_system = is_locale_international(); { # seeding with a known value, should get repeatable sequence from rand srand(666); # Note: need to sort the test cases, to get the same order that # was used in generating the answer key. foreach my $case (sort keys %{ $basic_test_cases }) { my $expected = $basic_test_cases->{ $case }; my $result = scramble_case( $case ); record_testcase( $case, $result ) if $TESTCASEGEN; is ($result, $expected, "test: $case"); } SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{$case}; my $result = scramble_case($case); record_testcase( $case, $result ) if $TESTCASEGEN; is ($result, $expected, "test: $case"); } } } # Regression test: make sure $_ isn't munged by unlocalized use { my $anything = "Whirl and Pieces"; my $keeper = "abc123"; local $_ = $keeper; scramble_case($anything); is ($_, $keeper, "\$\_ unaffected by capitalize_title"); } ####### ### end main, into the subs # I need to have test case generator code embedded in this *.t # A stand-alone script didn't work... # Something odd about using srand for a repeatable rand sequence? sub record_testcase { my $in = shift; my $out = shift; $in =~ s{'}{\\'}g; $out =~ s{'}{\\'}g; print {$TFH} " '$in' =>\n '$out',\n"; } # Hash of test cases (keys) and expected results (values) for # scramble_case, when seeded with a known value: srand(666) sub define_basic_test_cases { my %expect_scramble_case = ( '' => '', ' ...huh? ' => ' ...hUh? ', ' very spacey ' => ' vEry spACey ', '"but so!", sayeth I' => '"bUt so!", SAyEth I', '\'for not!\', he said.' => '\'FoR NOT!\', hE Said.', '-- ack, ack, bang!' => '-- ACk, ACk, bang!', '...and justice for all' => '...aNd JUsTICe fOr All', '...nor lost, nor found' => '...nOr losT, noR FOuNd', '10 Little-Endians' => '10 LIttle-ENDiANS', 'AWOL in the DMZ of WWIII' => 'Awol In THE dmZ OF WwiiI', 'Ah ha: and so forth' => 'Ah ha: AND SO forTh', 'And more. And still more.' => 'And moRE. And STIlL more.', 'And so they tramped on through the night. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp. Tramp...' => 'AnD So ThEY TRAmpEd on tHROUGh tHe NIghT. tramp. TramP. TramP. TRAMP. Tramp. TramP. trAMp...', 'And the rest is silence...' => 'And The Rest Is SIlEnce...', 'As I Ebb\'d with the Ocean of Life' => 'as i EBb\'D wITH ThE OCeaN of life', 'Ask not' => 'asK nOT', 'BEAT! BEAT! DRUMS!' => 'beAT! beAt! DRUMs!', 'Baron von Arnheim\'s revenge' => 'bARon vON aRNHEim\'s rEvEngE', 'DOODZ I AM SO THERE! NOT.' => 'doODz i Am so THEre! NOT.', 'Document. Test. Code. Repeat.' => 'DoCUmeNt. tESt. coDE. RePeat.', 'Erratic spacing: your KEY to creativity ' => 'eRRaTic spACiNg: youR keY tO CREATIvity ', 'From Pent-Up Aching Rivers' => 'frOm Pent-UP acHInG rIverS', 'Hell\'s Swells' => 'HeLL\'s SWells', 'In the beginning... was the global-set-key' => 'In tHE BeginNinG... wAs tHe GlobAL-SeT-Key', 'Mr. Wong and Dr. And Report' => 'Mr. wong aND dr. AnD rEPoRT', 'One\'s Self I Sing' => 'oNe\'S sElF i SiNG', 'Pain--has an Element of Blank' => 'Pain--HAS An eLemeNT of bLANK', 'Ping... ping... ping... pong!' => 'pINg... pINg... PInG... ponG!', 'Quinn Weaver, agent of SFPUG' => 'QuINN weavER, AGeNt of sFPUg', 'Scientific Study of the So-called Psychical Processes in the Higher Animals' => 'SciENtiFiC sTUDy oF THE sO-cAllEd PsYChiCal pRocESSes In tHE HIGHER AnImAls', 'TLAs i have known and loved' => 'TLas i HAVe kNOwn anD loVed', 'The 13 Clocks' => 'THe 13 clOCKs', 'The 4 False Weapons' => 'tHe 4 fAlsE weApons', 'The Next iMac: Just Another NeXt?' => 'the nexT ImAC: JUst AnotHer NexT?', 'The Running-Down of the Universe' => 'tHe RunNinG-Down OF ThE uniVERSE', 'The Wound-Dresser' => 'thE WoUNd-dRessER', 'The wind whispers "But!"' => 'thE WinD whISPers "bUT!"', 'Tis called perserverence in a good cause, and obstinacy in a bad one.' => 'Tis CAlLED persERveRenCE IN A GoOd CauSE, and oBStINaCY iN A BAD onE.', 'What about: a an the and or nor for but so yet not to of by at for but in, huh?' => 'wHaT AbOuT: A aN The AnD Or nOr FOR But sO yet NoT TO OF by aT FOR BuT In, HUh?', 'When I Heard the Learn\'d Astronomer' => 'wHEn I hEARD tHE LEarn\'d AstRonomER', 'Why? Well, why not?' => 'WHY? Well, WhY Not?', 'a brief history of the word of' => 'a brIeF HIStORY Of THe wORd of', 'a history of n.a.s.a.' => 'A history OF N.A.S.A.', 'a laboratory of the open fields' => 'a lAbOratOry OF THE open fIELds', 'a theory I have' => 'A thEORY I havE', 'and/or testified it shall be' => 'anD/Or TeStifieD it sHALl Be', 'chords against culture -- counter-sexist themes in the later works of Fetal Tissue Kleenex' => 'cHoRDS aGAINSt culTurE -- cOUNtEr-SExIsT themEs In tHE lATer Works of FETAL TIsSuE KLEenEx', 'forget gilroy, A. Snakhausem was here' => 'ForgEt GilrOy, a. sNakhaUSeM WAS herE', 'hey doc the ticker is hocked, the dial is locked, the face is botoxed, whazzup?' => 'hey DOC THe TIckEr is hocked, THE DiAL is lOckeD, The face IS Botoxed, WhazZuP?', 'history of the gort-verada-nictu moving company' => 'histOrY Of thE goRt-VeRaDa-NICtU MoViNG CompANy', 'how should one read a book?' => 'hOW shouLD ONe ReaD A bOoK?', 'ice9count0' => 'Ice9cOUNt0', 'it came from texas: the new new world order?' => 'It cAMe fRoM TEXAS: thE New NEW WoRlD oRdeR?', 'it\'s the man\'s, you know?' => 'it\'S ThE Man\'S, you KNOW?', 'kill \'em all' => 'KilL \'em ALl', 'machine13' => 'macHiNe13', 'mo\' beta-testing' => 'MO\' beta-TeStiNg', 'of beauty' => 'oF BEAutY', 'on style' => 'On stYle', 'pOiksIFiciZaLaTIonoRyISM' => 'pOiKsIfiCIzaLaTiOnoRyiSM', 's.a.d. days t.a.n. shades' => 's.A.d. DaYs T.a.N. shadEs', 'sarcasm yet not humor' => 'sARCasm YEt noT hUMor', 'sarcasm, yet' => 'sARCAsm, yeT', 'say "but!", say what?' => 'saY "but!", SAy WhAt?', 'the dirty 27' => 'thE dirTY 27', 'the end of the dream: three-holed button manufacture in a four-holed world' => 'tHE eNd of ThE drEAm: THree-holeD BUtTON MANUFAcTUre In A FoUR-HoLeD WorLD', 'the n.a.s.a. sucks rag' => 'ThE N.a.s.A. SucKs RAg', 'yet by and by but in for to' => 'yEt bY AND By BUt in For tO', 'you\'re wrong, it doesn\'t fly, it\'s not there and they\'re lost, so you\'d better not' => 'yoU\'RE WronG, IT DoesN\'t flY, IT\'S nOt tHEre and THEY\'RE LOST, sO You\'d betTeR NoT', ); return \%expect_scramble_case; }; sub define_basic_test_cases_i18n { my %expect_scramble_case = ( 'Didaktische Überlegungen/Erfahrungsbericht über den Computereinsatz im geisteswissenschaftlichen Unterricht am Bsp. "Historische Zeitung"' => 'dIdaKtISchE ÜbERLeGUngEN/ErFahrUnGsbERIcht üBer DeN CoMpUtEREInsATz im GeIsTesWissENscHAFTlIchEN unTERRicHt AM BSP. "hIStoRIsChe zEITuNg"', 'Explicación dél significado de los términos utilizados en "Don Quijote", por capítulo.' => 'exPlIcAciÓn dÉL SIgniFIcAdo De loS tÉRmiNoS UtiLiZadoS en "don QUIJoTE", pOr CAPÍtULO.', 'où l\'on découvre une époque à travers l\'oeuvre imposante d\'Honoré de Balzac' => 'oÙ l\'on déCouVRE uNE ÉPOQUe à traVErs l\'OeuvRE iMPOSAnte d\'hONorÉ De bAlzac', 'évêque, qu\'il eût aimé voir infliger à ceux qui ont abdiqué, J\'ai été reçu, and pepe le peau' => 'évÊQue, Qu\'iL eûT AIMÉ vOiR inflIgER à ceux quI onT AbdIQué, J\'aI éTé REçU, and pepe lE peaU', 'über maus' => 'übeR mAuS', ); return \%expect_scramble_case; } Text-Capitalize-1.5/t/lib000755001750001750 013543270756 14472 5ustar00doomdoom000000000000Text-Capitalize-1.5/t/lib/Test000755001750001750 013543270756 15411 5ustar00doomdoom000000000000Text-Capitalize-1.5/t/lib/Test/Locale000755001750001750 013543270756 16610 5ustar00doomdoom000000000000Text-Capitalize-1.5/t/lib/Test/Locale/Utils.pm000444001750001750 2242613543270756 20431 0ustar00doomdoom000000000000package Test::Locale::Utils; # doom@kzsu.stanford.edu # 29 Jan 2006 =head1 NAME Test::Locale::Utils - utilities for writing tests involving international characters =head1 SYNOPSIS use Test::More; use Test::Locale::Utils qw( is_locale_international ); my $i18n_test_cases = { 'über maus' => 'Über Maus', 'l\'oeuvre imposante d\'Honoré de Balzac' => 'L\'Oeuvre Imposante d\'Honoré de Balzac', } my $i18n_test_count = scalar( keys( %{ $i18n_test_cases } ) ); my $i18n_system = is_locale_international(); SKIP: { skip "Can't test strings with international chars", $i18n_count, unless $i18n_system; foreach my $case (sort keys %{ $i18n_test_cases }) { my $expected = $i18n_test_cases->{ $case }; my $result = capitalize_title( $case ); is ($result, $expected, "Testing: $case"); } } # Older style (deprecated): use Test::More; use Test::Locale::Utils qw(:all); my @exchars = extract_extended_chars(\@strings); my $internat = internationalized_locale(@exchars); # Deprecated my $exchars_str = join '', @exchars; my $exchars_rule = qr{[$exchars_str]}; foreach my $string (@strings) { SKIP: { skip "This locale can't deal with i18n chars in string: $string", 1, unless ($internat && ($string =~ /$exchars_rule/) ); is( $expected{$string}, string_transformation($string), "Testing $string" ); } } =head1 DESCRIPTION A small collection of utility functions to make it easier to write tests that work with strings that may contain characters beyond the 7bit ASCII range (e.g. the "extended characters" or "international characters" of iso9959-1 and friends). =head1 EXPORTED Nothing by default. All of the following are exportable on request (and all may be requested with the ":all" tag). =over =cut use 5.006; use strict; use warnings; # use locale; use utf8; use Carp; use Data::Dumper; # use List::MoreUtils qw( all ); # Not in core, so writing my own "all" my $DEBUG = 0; require Exporter; use vars qw( @ISA %EXPORT_TAGS @EXPORT @EXPORT_OK $VERSION); @ISA = qw(Exporter); %EXPORT_TAGS = ( 'all' => [ qw( extract_extended_chars internationalized_locale is_locale_international define_sample_i18n_chars all_true is_uc_and_lc_internationalized is_ucfirst_internationalized ) ] ); @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); @EXPORT = qw( ); $VERSION = '0.01'; =item extract_extended_chars Given a reference to an array of strings, returns a list of all extended characters (i.e. characters with the eigth-bit set) that have appeared at least once in the strings. =cut sub extract_extended_chars { my $aref = shift; my $sevenbit_rule = qr{[\x00-\x7F]}; my %seen; foreach my $string ( @{$aref} ) { (my $residue = $string) =~ s/$sevenbit_rule//g; my @chars = split //, $residue; @seen{@chars} = (); # mark these chars as seen by filling hash with "undef" values } my @exchars = sort keys %seen; return @exchars; } =item is_locale_international Does some crude checks of uc, lc, and ucfirst to see if they handle some international characters (latin-1) correctly, or at least well enough that we can expect the international character test cases of Text::Capitalize to have meaningful results. =cut sub is_locale_international { my $result = all_true( [ is_uc_and_lc_internationalized(), is_ucfirst_internationalized(), ]); return $result; } =item is_uc_and_lc_internationalized Looks at the behavior of uc and lc for a small sample of "international characters": this simply checks if the extended characters of latin-1 and friends have an upper and lower form defined as expected. =cut sub is_uc_and_lc_internationalized { my $exchars = define_sample_i18n_chars(); # use locale; # use utf8; my @checks; foreach my $pair ( @{ $exchars } ) { my $lower = $pair->[0]; my $upper = $pair->[1]; my $new_up = uc($lower); my $new_down = lc($upper); if ( ($upper eq $new_up) && ($lower eq $new_down) ) { # transformed as expected push @checks, 1; } else { push @checks, 0; } } print STDERR "internationalized_locale: char status: ", join " ", @checks, "\n" if ($DEBUG) ; my $okay = all_true( \@checks ); return $okay; } =item define_sample_i18n_chars Returns a short list of pairs of extended characters, pairing a lowercase form with an uppercase one (an aref of arefs). These were selected because they're the only extended characters in use in the test cases for L. =cut sub define_sample_i18n_chars { use utf8; my @exchars = ( ['ü', 'Ü'], ['é', 'É'], ['í', 'Í'], ['ó', 'Ó'], ); # print Dumper( \@exchars ), "\n"; return \@exchars; } =item is_ucfirst_internationalized A very specific test to to see if ucfirst can upcase German's "over". If it can, we assume ucfirst is working on the kind of international characters used in the Text::Capitalized tests. Motivation: Solaris boxes apparently have a knack for getting uc and lc to work on international characters, but still leaving ucfirst broken -- it upcases the character *after* a leading international character (such as a latin-1 u-umlaut): Text-Capitalize-0.8: - i86pc-solaris-thread-multi / 5.8.8: - FAIL http://nntp.x.perl.org/group/perl.cpan.testers/5882611 - sun4-solaris-64int / 5.8.4: - FAIL http://nntp.x.perl.org/group/perl.cpan.testers/5846995 =cut sub is_ucfirst_internationalized { # use locale; my ($over, $upper_over); { use utf8; $over = 'über'; $upper_over = 'Über'; } use utf8; my $new_upper = ucfirst( $over ); if( $new_upper eq $upper_over ) { return 1; } else { return 0; } } =item all_true Example usage: my $okay = all_true( \@checks ); This is an alternative to List::MoreUtils "all", written to avoid a non-core dependency for the L tests. Note: If you'd rather use that more common module, do this: use List::MoreUtils qw( all ); my $okay = all { ($_) } @checks; =cut sub all_true { my $aref = shift; my $flag = 1; foreach my $item ( @{ $aref } ) { unless ($item) { $flag = 0; last; } } return $flag; } =item internationalized_locale DEPRECATED. use L instead. Given an array of extended characters that you care about, this code will check to make sure that the current locale seems to comprehend what to do with them. Specifically, it checks to see if they have a defined upper and lower case. This is an excessively simple version that just looks at the extended characters to see if they change case when run through either uc or lc. This apparently fails for some locales, e.g. Russian, where the extended chars are in the same locations as in iso8859, but the upper and lower have reversed positions. =cut sub internationalized_locale { my @exchars = @_; # use locale; my $okay = 1; foreach my $ex (@exchars) { my $up = uc($ex); my $down = lc($ex); if ($up eq $down) { # then we got problems warn "For this locale, uc & lc act strangely on $ex\n" if $DEBUG; $okay = 0; } } return $okay; } 1; __END__ =back =head1 DISCUSSION The "use locale" story seems to have some notable gaps. A brief summary, off the top of my head: There's no definitive way to get a listing of all available locales on a system. The right way to do it varies from platform to platform. There's no definitive way of finding out what platform you're on: You can check ^O, but you need to parse it yourself (and that's not as easy as you might think: matching for /win/ to see if you're on a windows platform will get confused in cases like "cygwin"). There's no definitive list of all possible values of ^O. There are some useful tricks in the POSIX module that can help with these issues, but you can't count on every system that perl runs on being POSIX compliant, (and like I just said, checking what kind of platform you're on is a little trickier than you'd think). And a recent discovery of mine: when the locale is utf-8, doing a "use locale" does not give you "unicode semantics", you actually have to do "utf8::upgrade" on anything you want "uc" and friends to work on. Heigh-ho. This little module is an attempt at cutting the Gordian Knot represented by this cluster of problems, at least as far as the automated tests for L are concerned. Since it's difficult to determine the Right Way to do cross-platform checks of string handling including international characters, instead I use some simple operational tests to see if the system does what's expected with the international characters, and if not, the tests using those characters will be skipped. =head1 SEE ALSO L =head1 AUTHOR Joseph Brenner, Edoom@kzsu.stanford.eduE =head1 COPYRIGHT AND LICENSE Copyright (C) 2006 by Joseph Brenner This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.2 or, at your option, any later version of Perl 5 you may have available. =head1 BUGS None reported... yet. =cut # Local Variables: # coding: utf-8-unix # End: Text-Capitalize-1.5/t/lib/Test/Locale/t000755001750001750 013543270756 17053 5ustar00doomdoom000000000000Text-Capitalize-1.5/t/lib/Test/Locale/t/000-all_true-Test-Locale-Utils-private.t000444001750001750 345313543270756 26366 0ustar00doomdoom000000000000# Test file created outside of h2xs framework. # Run this like so: `perl Test-Locale-Utils.t' # doom@kzsu.stanford.edu 2009/03/13 20:11:32 use warnings; use strict; $|=1; my $DEBUG = 1; # TODO set to 0 before ship use Data::Dumper; use Test::More; BEGIN { plan tests => 7 }; # TODO change to 'tests => last_test_to_print'; use FindBin qw( $Bin ); # ~/End/Cave/CapitalizeTitle/Wall/Text/Capitalize/t/lib/Test/Locale/t/ use lib ("$Bin/../../.."); # TODO I'm "use"ing this twice... BEGIN { use_ok( 'Test::Locale::Utils' ); } ok(1, "Traditional: If we made it this far, we're ok."); use Test::Locale::Utils qw(:all); { my $test_name = "Testing all_true"; my $test_case = "Array of true items."; my @array = (1, '1', 'hey there', defined(' '), 'hard times', ); my $flag = all_true( \@array ); is( $flag, 1, "$test_name: $test_case"); } { my $test_name = "Testing all_true"; my $test_case = "Array of items, one *not* true"; my (@array, $flag); @array = ( 1, '1', 0, 'hard times', ); $flag = all_true( \@array ); is( $flag, 0, "$test_name: $test_case: zero"); @array = ( 1, '1', '', 'hard times', ); $flag = all_true( \@array ); is( $flag, 0, "$test_name: $test_case: empty string"); @array = ( 1, '1', '0', 'hard times', ); $flag = all_true( \@array ); is( $flag, 0, "$test_name: $test_case: quoted zero"); @array = ( 1, '1', undef, 'hard times', ); $flag = all_true( \@array ); is( $flag, 0, "$test_name: $test_case: undef"); }