Data-Record-0.02/0000755000076500007650000000000010313561670015610 5ustar curtispoecurtispoe00000000000000Data-Record-0.02/Build.PL0000644000076500007650000000125310313561670017105 0ustar curtispoecurtispoe00000000000000#!/usr/bin/perl -w
use strict;
use Module::Build;
my $builder = Module::Build->new(
module_name => 'Data::Record',
license => 'perl',
dist_author => 'Curtis "Ovid" Poe ',
dist_version_from => 'lib/Data/Record.pm',
requires => {
# No, I don't really require Sub::Uplevel, but I'm tired of my
# tests failing because of this.
'Sub::Uplevel' => 0.09,
},
build_requires => {
'Test::Exception' => 0.21,
'Test::More' => 0.6,
},
add_to_cleanup => ['Data-Record-*'],
create_makefile_pl => 'traditional',
);
$builder->create_build_script();
Data-Record-0.02/Changes0000644000076500007650000000024210313561670017101 0ustar curtispoecurtispoe00000000000000Revision history for Data-Record
0.02 2005/09/19
Fixed two POD errors.
0.01 2005/09/18
First version, released on an unsuspecting world.
Data-Record-0.02/lib/0000755000076500007650000000000010313561670016356 5ustar curtispoecurtispoe00000000000000Data-Record-0.02/lib/Data/0000755000076500007650000000000010313561670017227 5ustar curtispoecurtispoe00000000000000Data-Record-0.02/lib/Data/Record.pm0000644000076500007650000003046510313561670021013 0ustar curtispoecurtispoe00000000000000package Data::Record;
use warnings;
use strict;
=head1 NAME
Data::Record - "split" on steroids
=head1 VERSION
Version 0.02
=cut
our $VERSION = '0.02';
use constant NOT_FOUND => -1;
use constant ALL_RECORDS => -1;
use constant TRIM_RECORDS => 0;
=head1 SYNOPSIS
use Regexp::Common;
use Data::Record;
my $record = Data::Record->new({
split => "\n",
unless => $RE{quoted},
});
my @data = $record->records($data);
=head1 DESCRIPTION
Sometimes we need data split into records and a simple split on the input
record separator (C<$/>) or some other value fails because the values we're
splitting on may allowed in other parts of the data. Perhaps they're quoted.
Perhaps they're embedded in other data which should not be split up.
This module allows you to specify what you wish to split the data on, but also
speficy an "unless" regular expression. If the text in question matches the
"unless" regex, it will not be split there. This allows us to do things like
split on newlines unless newlines are embedded in quotes.
=head1 METHODS
=head2 new
Common usage:
my $record = Data::Record->new({
split => qr/$split/,
unless => qr/$unless/,
});
Advanced usage:
my $record = Data::Record->new({
split => qr/$split/,
unless => qr/$unless/, # optional
token => $token, # optional
chomp => 0, # optional
limit => $limit, # optional (do not use with trim)
trim => 1, # optional (do not use with limit)
fields => {
split => ',',
unless => $RE{quoted}, # from Regexp::Common
}
});
The constructor takes a hashref of key/value pairs to set the behavior of data
records to be created.
=over 4
=item * split
This is the value to split the data on. It may be either a regular expression
or a string.
Defaults to the current input record separator (C<$/>).
=item * unless
Data will be split into records matching the split value I they also
match this value. No default.
If you do not have an C value, use of this module is overkill.
=item * token
You will probably never need to set this value.
Internally, this module attempts to find a token which does not match any text
found in the data to be split and also does not match the split value. This is
necessary because we mask the data we don't want to split using this token.
This allows us to split the resulting text.
In the unlikely event that the module cannot find a token which is not in the
text, you may set the token value yourself to some string value. Do not set it
to a regular expression.
=item * chomp
By default, the split value is discarded (chomped) from each record. Set this
to a true value to keep the split value on each record. This differs slightly
from how it's done with split and capturing parentheses:
split /(\,)/, '3,4,5';
Ordinarily, this results in the following list:
( 3, ',', 4, ',', 5 )
This module assumes you want those values I the preceding record. By
setting chomp to false, you get the following list:
( '3,', '4,' 5 )
=item * limit
The default split behavior is similar to this:
split $split_regex, $data;
Setting C will cause the behavior to act like this:
split $split_regex, $data, $limit
See C for more information about the behavior of C.
You may not set both C and C in the constructor.
=item * trim
By default, we return all records. This means that due to the nature of split
and how we're doing things, we sometimes get a trailing null record. However,
setting this value causes the module to behave as if we had done this:
split $split_regex, $data, 0;
When C is called with a zero as the third argument, trailing null values
are discarded. See C for more information.
You may not set both C and C in the constructor.
B: This does I trim white space around returned records.
=item * fields
By default, individual records are returned as strings. If you set C,
you pass in a hashref of arguments that are identical to what C would take
and resulting records are returned as array references processed by a new
C instance.
Example: a quick CSV parser which assumes that commas and newlines may both be
in quotes:
# four lines, but there are only three records! (newline in quotes)
$data = <<'END_DATA';
1,2,"programmer, perl",4,5
1,2,"programmer,
perl",4,5
1,2,3,4,5
END_DATA
$record = $RECORD->new({
split => "\n",
unless => $quoted,
trim => 1,
fields => {
split => ",",
unless => $quoted,
}
});
my @records = $record->records($data);
foreach my $fields (@records) {
foreach my $field = (@$fields);
# do something
}
}
Note that above example will not remove the quotes from individual fields.
=back
=cut
sub new {
my ( $class, $value_of ) = @_;
my %value_of = %$value_of;
# XXX fix this later after we have the core working
my $self = bless {}, $class;
unless ( exists $value_of{split} ) {
$value_of{split} = $/;
}
$self->split( $value_of{split} )->unless( $value_of{unless} )
->chomp( exists $value_of{chomp} ? $value_of{chomp} : 1 )
->limit( exists $value_of{limit} ? $value_of{limit} : ALL_RECORDS );
$self->token( $value_of{token} ) if exists $value_of{token};
if ( exists $value_of{trim} ) {
$self->_croak("You may not specify 'trim' if 'limit' is specified")
if exists $value_of{limit};
$self->trim(1);
}
$self->_fields( $value_of{fields} ) if exists $value_of{fields};
return $self;
}
##############################################################################
=head2 split
my $split = $record->split;
$record->split($on_value);
Getter/setter for split value. May be a regular expression or a scalar value.
=cut
sub split {
my $self = shift;
return $self->{split} unless @_;
my $split = shift;
$split = qr/\Q$split\E/ unless 'Regexp' eq ref $split;
$self->{split} = $split;
return $self;
}
##############################################################################
=head2 unless
my $unless = $self->unless;
$self->unless($is_value);
Getter/setter for unless value. May be a regular expression or a scalar value.
=cut
sub unless {
my $self = shift;
return $self->{unless} unless @_;
my $unless = shift;
$unless = '' unless defined $unless;
$unless = qr/\Q$unless\E/
unless 'Regexp' eq ref $unless
|| 'Regexp::Common' eq ref $unless;
$self->{unless} = $unless;
return $self;
}
##############################################################################
=head2 chomp
my $chomp = $record->chomp;
$record->chomp(0);
Getter/setter for boolean chomp value.
=cut
sub chomp {
my $self = shift;
return $self->{chomp} unless @_;
$self->{chomp} = shift;
return $self;
}
##############################################################################
=head2 limit
my $limit = $record->limit;
$record->limit(3);
Getter/setter for integer limit value.
=cut
sub limit {
my $self = shift;
return $self->{limit} unless @_;
my $limit = shift;
unless ( $limit =~ /^-?\d+$/ ) {
$self->_croak("limit must be an integer value, not ($limit)");
}
$self->{limit} = $limit;
return $self;
}
##############################################################################
=head2 trim
my $trim = $record->trim;
$record->trim(1);
Getter/setter for boolean limit value. Setting this value will cause any
previous C value to be overwritten.
=cut
sub trim {
my $self = shift;
return $self->{trim} unless @_;
my $limit = shift;
$self->{limit} = $limit ? TRIM_RECORDS : ALL_RECORDS;
}
##############################################################################
=head2 token
my $token = $record->token;
$record->token($string_not_found_in_text);
Getter/setter for token value. Token must be a string that does not match the
split value and is not found in the text.
You can return the current token value if you have set it in your code. If you
rely on this module to create a token (this is the normal behavior), it is not
available via this method until C is called.
Setting the token to an undefined value causes L to try and find
a token itself.
If the token matches the split value, this method will croak when you attempt
to set the token.
If the token is found in the data, the C method will croak when it is
called.
=cut
sub token {
my $self = shift;
return $self->{token} unless @_;
my $token = shift;
if ( defined $token ) {
if ( $token =~ $self->split ) {
$self->_croak(
"Token ($token) must not match the split value (@{[$self->split]})"
);
}
}
$self->{token} = $token;
return $self;
}
##############################################################################
=head2 records
my @records = $record->records($data);
Returns C<@records> for C<$data> based upon current split criteria.
=cut
sub records {
my ( $self, $data ) = @_;
my $token = $self->_create_token($data);
my @values;
if ( defined( my $unless = $self->unless ) ) {
my $index = 0;
$data =~ s{($unless)}
{
$values[$index] = $1;
$token . $index++ . $token;
}gex;
#main::diag($data);
}
my $split = $self->split;
$split = $self->chomp ? $split : qr/($split)/;
# if they have a numeric split value, we don't want to split tokens
my $token_re = qr/\Q$token\E/;
$split = qr/(?limit;
unless ( $self->chomp ) {
my @new_records;
while ( defined( my $record = shift @records ) ) {
if (@records) {
$record = join '', $record, shift @records;
}
push @new_records, $record;
}
@records = @new_records;
}
foreach my $record (@records) {
unless ( NOT_FOUND eq index $record, $token ) {
$record =~ s{$token_re(\d+)$token_re}{$values[$1]}gex;
}
}
if ( my $field = $self->_fields ) {
$_ = [ $field->records($_) ] foreach @records;
}
return @records;
}
sub _fields {
my $self = shift;
return $self->{fields} unless @_;
my $fields = ref($self)->new(shift);
if ( defined( my $token = $self->token ) ) {
$fields->token($token);
}
$self->{fields} = $fields;
return $self;
}
my @tokens = map { $_ x 6 } qw( ~ ` ? " { } ! @ $ % ^ & * - _ + = );
sub _create_token {
my ( $self, $data ) = @_;
my $token;
if ( defined( $token = $self->token ) ) {
$self->_croak("Current token ($token) found in data")
unless NOT_FOUND eq index $data, $token;
}
foreach my $curr_token (@tokens) {
if ( NOT_FOUND eq index $data, $curr_token ) {
$token = $curr_token;
$self->token($token);
last;
}
}
if ( defined $token ) {
return $token;
}
my $tried = join ", ", @tokens;
$self->_croak(
"Could not determine a unique token for data. Tried ($tried)");
}
sub _croak {
my ( $self, $message ) = @_;
require Carp;
Carp::croak($message);
}
=head1 BUGS
It's possible to get erroneous results if the split value is C. I've
tried to work around this. Please let me know if there is a problem.
=head1 CAVEATS
This module must read I of the data at once. This can make it slow for
larger data sets.
=head1 AUTHOR
Curtis "Ovid" Poe, C<< >>
=head1 BUGS
Please report any bugs or feature requests to
C, or through the web interface at
L.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.
=head1 ACKNOWLEDGEMENTS
Thanks to the Monks for inspiration from
L.
0.02 Thanks to Smylers and Stefano Rodighiero for catching POD errors.
=head1 COPYRIGHT & LICENSE
Copyright 2005 Curtis "Ovid" Poe, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut
1; # End of Data::Record
Data-Record-0.02/Makefile.PL0000644000076500007650000000056210313561670017565 0ustar curtispoecurtispoe00000000000000# Note: this file was auto-generated by Module::Build::Compat version 0.03
use ExtUtils::MakeMaker;
WriteMakefile
(
'PL_FILES' => {},
'INSTALLDIRS' => 'site',
'NAME' => 'Data::Record',
'EXE_FILES' => [],
'VERSION_FROM' => 'lib/Data/Record.pm',
'PREREQ_PM' => {
'Test::More' => '0.6',
'Test::Exception' => '0.21',
'Sub::Uplevel' => '0.09'
}
)
;
Data-Record-0.02/MANIFEST0000644000076500007650000000023410313561670016740 0ustar curtispoecurtispoe00000000000000Build.PL
Changes
MANIFEST
Makefile.PL
META.yml # Will be created by "make dist"
README
lib/Data/Record.pm
t/00-load.t
t/10record.t
t/pod-coverage.t
t/pod.t
Data-Record-0.02/META.yml0000644000076500007650000000061510313561670017063 0ustar curtispoecurtispoe00000000000000---
name: Data-Record
version: 0.02
author:
- Curtis "Ovid" Poe
abstract: '"split" on steroids'
license: perl
resources:
license: http://dev.perl.org/licenses/
requires:
Sub::Uplevel: 0.09
build_requires:
Test::Exception: 0.21
Test::More: 0.6
provides:
Data::Record:
file: lib/Data/Record.pm
version: 0.02
generated_by: Module::Build version 0.2702
Data-Record-0.02/README0000644000076500007650000000047710313561670016500 0ustar curtispoecurtispoe00000000000000Data-Record
INSTALLATION
To install this module, run the following commands:
perl Build.PL
./Build
./Build test
./Build install
COPYRIGHT AND LICENCE
Copyright (C) 2005 Curtis "Ovid" Poe
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
Data-Record-0.02/t/0000755000076500007650000000000010313561670016053 5ustar curtispoecurtispoe00000000000000Data-Record-0.02/t/00-load.t0000644000076500007650000000022210313561670017370 0ustar curtispoecurtispoe00000000000000#!perl -T
use Test::More tests => 1;
BEGIN {
use_ok( 'Data::Record' );
}
diag( "Testing Data::Record $Data::Record::VERSION, Perl $], $^X" );
Data-Record-0.02/t/10record.t0000644000076500007650000001055310313561670017663 0ustar curtispoecurtispoe00000000000000#!/usr/bin/perl -w
use strict;
use Test::More qw/no_plan/;
use Test::Exception;
use Data::Dumper;
my $RECORD;
BEGIN {
chdir 't' if -d 't';
unshift @INC => '../lib';
$RECORD = "Data::Record";
use_ok $RECORD or die "Cannot use $RECORD";
}
# gleefully stoled from Regexp::Common
my $quoted = qr/(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\')|(?:\`)(?:[^\\\`]*(?:\\.[^\\\`]*)*)(?:\`))/;
can_ok $RECORD, 'new';
throws_ok { $RECORD->new({limit => 13, trim => 0}) }
qr/^You may not specify 'trim' if 'limit' is specified/,
'new() should croak if both limit and trim are specified';
my %attributes = (
split => '.',
unless => $quoted,
);
ok my $record = $RECORD->new(\%attributes) , '... and calling it should suceed';
isa_ok $record, $RECORD, '... and the object it returns';
my @expected = (
'loves("Mr. Poe", prolog)',
'loves(ovid, perl)',
'eq(ovid, "Mr. Poe")',
);
my $data = join '.', @expected;
$data .= '.';
can_ok $record, 'records';
ok my @records = $record->records($data), '... and it should return records';
is_deeply \@records, [@expected, ''], '... and they should be the correct ones';
can_ok $record, 'limit';
is $record->limit, -1, '... initial limit should pull all records';
throws_ok { $record->limit('none') }
qr/^limit must be an integer value, not \(none\)/,
'... but setting it a non-numeric value should fail';
throws_ok { $record->limit(3.2) }
qr/^limit must be an integer value, not \(3.2\)/,
'... and setting it a numeric non-integer value should fail';
$record->limit(0);
ok @records = $record->records($data), '... and it should return records';
is_deeply \@records, \@expected, '... and they should be the correct ones';
can_ok $record, 'chomp';
is $record->chomp, 1, '... and it should have a true default value';
$_ .= '.' foreach @expected;
$record->chomp(0);
ok @records = $record->records($data), '... and it should return records';
is_deeply \@records, \@expected, '... and they should be the correct ones';
$data = <<'END_DATA';
loves("Mr.
Poe", Language):-
not(eq(Language, java)).
loves(ovid, perl).
eq(ovid, "Mr. Poe").
END_DATA
@expected = (
qq'loves("Mr.\nPoe", Language):-\n not(eq(Language, java)).\n',
"loves(ovid, perl).\n",
qq'eq(ovid, "Mr. Poe").\n',
);
$record->split(".\n")
->unless($quoted)
->chomp(0)
->trim(1);
@records = $record->records($data);
is_deeply \@records, \@expected,
'We should be able to keep the split value and trim trailing nulls';
can_ok $record, 'token';
my $token = $record->token;
ok +(-1 eq index $data, $token),
'... and the token should not be present in the data';
ok $record->token('XXX'),
'... setting the token to a value that does not match split should succeed';
@records = $record->records($data);
is_deeply \@records, \@expected,
'... and the result of $record->records should be unchanged';
throws_ok { $record->token(".\n") }
qr/Token \(\.\n\) must not match the split value.*/,
'... but it should fail if it matches the split value';
ok $record->token('ovid'),
'We should be able to set the token to a value in our target text';
throws_ok { $record->records($data) }
qr/Current token \(ovid\) found in data/,
'... but calling records should then croak()';
$data = join "\n", map { $_ x 6 } qw( ~ ` ? " { } ! @ $ % ^ & * - _ + = );
$record->token(undef);
throws_ok { $record->records($data) }
qr/^Could not determine a unique token for data.*/,
'Calling records() should fail if we cannot determine a unique token';
$data = 'xx33yyy999zzz0aaa2bbb';
$record = $RECORD->new({
split => qr/\d+/,
unless => '999'
});
@records = $record->records($data);
@expected = (
'xx',
'yyy999zzz',
'aaa',
'bbb'
);
is_deeply \@records, \@expected,
'We should be able to correctly split records even if their split is numeric';
$data = <<'END_DATA';
1,2,"programmer, perl",4,5
1,2,"programmer,
perl",4,5
1,2,3,4,5
END_DATA
$record = $RECORD->new({
split => "\n",
unless => $quoted,
trim => 1,
fields => {
split => ",",
unless => $quoted,
}
});
@records = $record->records($data);
@expected = (
[ 1, 2, '"programmer, perl"', 4, 5 ],
[ 1, 2, qq'"programmer,\nperl"', 4, 5 ],
[ 1, 2, 3, 4, 5 ],
);
is_deeply \@records, \@expected,
'Specifiying how you want your fields created should succeed';
Data-Record-0.02/t/pod-coverage.t0000644000076500007650000000025410313561670020614 0ustar curtispoecurtispoe00000000000000#!perl -T
use Test::More;
eval "use Test::Pod::Coverage 1.04";
plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
all_pod_coverage_ok();
Data-Record-0.02/t/pod.t0000644000076500007650000000021410313561670017017 0ustar curtispoecurtispoe00000000000000#!perl -T
use Test::More;
eval "use Test::Pod 1.14";
plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
all_pod_files_ok();