Text-BibTeX-Validate-0.3.0/0000755000175000017500000000000014013404076015173 5ustar andriusandriusText-BibTeX-Validate-0.3.0/scripts/0000755000175000017500000000000014013404076016662 5ustar andriusandriusText-BibTeX-Validate-0.3.0/scripts/bibtex-validate0000755000175000017500000000150614013404076021656 0ustar andriusandrius#!/usr/bin/perl use strict; use warnings; use Scalar::Util qw( blessed ); use Text::BibTeX; use Text::BibTeX::Validate qw( validate_BibTeX ); use Text::BibTeX::Validate::Warning; @ARGV = ( '-' ) unless @ARGV; for my $filename (@ARGV) { my $bibfile = Text::BibTeX::File->new( $filename ) || die "$filename: $!\n"; while( my $entry = Text::BibTeX::Entry->new( $bibfile ) ) { my @warnings = validate_BibTeX( $entry ); for my $warning (@warnings) { if( blessed $warning && $warning->isa( Text::BibTeX::Validate::Warning:: ) ) { $warning->{file} = $filename; $warning->{key} = $entry->{key}; warn "$0: $warning\n"; } else { warn "$0: $filename: $entry->{key}: $warning"; } } } } Text-BibTeX-Validate-0.3.0/README0000644000175000017500000000047014013404076016054 0ustar andriusandrius This archive contains the distribution Text-BibTeX-Validate, version 0.3.0: validator for BibTeX format This software is Copyright (c) 2021 by Andrius Merkys. This is free software, licensed under: The (three-clause) BSD License This README file was generated by Dist::Zilla::Plugin::Readme v6.010. Text-BibTeX-Validate-0.3.0/Makefile.PL0000644000175000017500000000250214013404076017144 0ustar andriusandrius# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.010. use strict; use warnings; use ExtUtils::MakeMaker; my %WriteMakefileArgs = ( "ABSTRACT" => "validator for BibTeX format", "AUTHOR" => "Andrius Merkys ", "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => 0 }, "DISTNAME" => "Text-BibTeX-Validate", "LICENSE" => "bsd", "NAME" => "Text::BibTeX::Validate", "PREREQ_PM" => { "Algorithm::CheckDigits" => 0, "Data::Validate::Email" => 0, "Data::Validate::URI" => 0, "Scalar::Util" => 0, "Text::sprintfn" => 0 }, "TEST_REQUIRES" => { "File::Temp" => 0, "Test::More" => 0, "Text::BibTeX" => 0 }, "VERSION" => "0.3.0", "test" => { "TESTS" => "t/*.t" } ); my %FallbackPrereqs = ( "Algorithm::CheckDigits" => 0, "Data::Validate::Email" => 0, "Data::Validate::URI" => 0, "File::Temp" => 0, "Scalar::Util" => 0, "Test::More" => 0, "Text::BibTeX" => 0, "Text::sprintfn" => 0 ); unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { delete $WriteMakefileArgs{TEST_REQUIRES}; delete $WriteMakefileArgs{BUILD_REQUIRES}; $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs; } delete $WriteMakefileArgs{CONFIGURE_REQUIRES} unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; WriteMakefile(%WriteMakefileArgs); Text-BibTeX-Validate-0.3.0/lib/0000755000175000017500000000000014013404076015741 5ustar andriusandriusText-BibTeX-Validate-0.3.0/lib/Text/0000755000175000017500000000000014013404076016665 5ustar andriusandriusText-BibTeX-Validate-0.3.0/lib/Text/BibTeX/0000755000175000017500000000000014013404076020002 5ustar andriusandriusText-BibTeX-Validate-0.3.0/lib/Text/BibTeX/Validate/0000755000175000017500000000000014013404076021533 5ustar andriusandriusText-BibTeX-Validate-0.3.0/lib/Text/BibTeX/Validate/Warning.pm0000644000175000017500000000541114013404076023477 0ustar andriusandriuspackage Text::BibTeX::Validate::Warning; use strict; use warnings; # ABSTRACT: validaton warning class our $VERSION = '0.3.0'; # VERSION use Text::sprintfn; =head1 NAME Text::BibTeX::Validate::Warning - validaton warning class =head1 SYNOPSIS use Text::BibTeX::Validate::Warning; my $warning = Text::BibTeX::Validate::Warning->new( 'value \'%(value)s\' is better written as \'%(suggestion)s\'', { field => 'month', value => '1', suggestion => 'Jan', } ); print STDERR "$warning\n"; =head1 DESCRIPTION Text::BibTeX::Validate::Warning is used to store the content of validation warning (as emitted by L) in a structured way. Overloads are defined to stringify and to compare instances of the class. =head1 METHODS =cut use overload '""' => \&to_string, 'cmp' => \&_cmp; =head2 new( $message, $fields ) Takes L-compatible template and a hash with the values for replacement in the template. Three field names are reserved and used as prefixes for messages if defined: C for the name of a file, C for BibTeX key and C for BibTeX field name. Field C is also somewhat special, as L may use its value to replace the original in an attempt to clean up the BibTeX entry. =cut sub new { my( $class, $message, $fields ) = @_; my $self = { %$fields, message => $message }; return bless $self, $class; } =head2 fields() Returns an array of fields defined in the instance in any order. =cut sub fields { return keys %{$_[0]}; } =head2 get( $field ) Returns value of a field. =cut sub get { my( $self, $field ) = @_; return $self->{$field}; } =head2 set( $field, $value ) Sets a new value for a field. Returns the old value. =cut sub set { my( $self, $field, $value ) = @_; ( my $old_value, $self->{$field} ) = ( $self->{$field}, $value ); return $old_value; } =head2 delete( $field ) Unsets value for a field. Returns the old value. =cut sub delete { my( $self, $field ) = @_; my $old_value = $self->{$field}; delete $self->{$field}; return $old_value; } =head2 to_string() Return a string representing the warning. =cut sub to_string { my( $self ) = @_; my $message = $self->{message}; $message = '%(field)s: ' . $message if exists $self->{field}; $message = '%(key)s: ' . $message if exists $self->{key}; $message = '%(file)s: ' . $message if exists $self->{file}; return sprintfn $message, { %$self }; } sub _cmp { my( $a, $b, $are_swapped ) = @_; return "$a" cmp "$b" * ($are_swapped ? -1 : 1); } =head1 AUTHORS Andrius Merkys, Emerkys@cpan.orgE =cut 1; Text-BibTeX-Validate-0.3.0/lib/Text/BibTeX/Validate.pm0000644000175000017500000002061114013404076022071 0ustar andriusandriuspackage Text::BibTeX::Validate; use strict; use warnings; # ABSTRACT: validator for BibTeX format our $VERSION = '0.3.0'; # VERSION use Algorithm::CheckDigits; use Data::Validate::Email qw( is_email_rfc822 ); use Data::Validate::URI qw( is_uri ); use Scalar::Util qw( blessed ); use Text::BibTeX::Validate::Warning; require Exporter; our @ISA = qw( Exporter ); our @EXPORT_OK = qw( clean_BibTeX shorten_DOI validate_BibTeX ); my @months = qw( january february march april may june july august september october november december ); =head1 NAME Text::BibTeX::Validate - validator for BibTeX format =head1 SYNOPSIS use Text::BibTeX; use Text::BibTeX::Validate qw( validate_BibTeX ); my $bibfile = Text::BibTeX::File->new( 'bibliography.bib' ); while( my $entry = Text::BibTeX::Entry->new( $bibfile ) ) { for my $warning (validate_BibTeX( $entry )) { print STDERR "$warning\n"; } } =head1 DESCRIPTION Text::BibTeX::Validate checks the standard fields of BibTeX entries for their compliance with their format. In particular, value of C is checked against RFC 822 mandated email address syntax, value of C is checked to start with C<10.> and contain at least one C and so on. Some nonstandard fields as C, C and C are also checked. Failures of checks are returned as instances of L. =head1 METHODS =cut sub shorten_DOI($); =head2 validate_BibTeX( $what ) Takes plain Perl hash reference containing BibTeX fields and their values, as well as L instances and returns an array of validation messages as instances of L. =cut sub validate_BibTeX { my( $what ) = @_; my $entry = _convert( $what ); my @warnings; # Report and remove empty keys for my $key (sort keys %$entry) { next if defined $entry->{$key}; push @warnings, _warn_value( 'undefined value', $entry, $key ); delete $entry->{$key}; } if( exists $entry->{email} && !defined is_email_rfc822 $entry->{email} ) { push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid ' . 'email address', $entry, 'email' ); } if( exists $entry->{doi} ) { my $doi = $entry->{doi}; my $doi_now = shorten_DOI $doi; if( $doi_now !~ m|^10\.[^/]+/| ) { push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid DOI', $entry, 'doi' ); } elsif( $doi ne $doi_now ) { push @warnings, _warn_value( 'value \'%(value)s\' is better written as \'%(suggestion)s\'', $entry, 'doi', { suggestion => $doi_now } ); } } # Validated according to BibTeX recommendations if( exists $entry->{month} ) { if( $entry->{month} =~ /^0?[1-9]|1[12]$/ ) { push @warnings, _warn_value( 'value \'%(value)s\' is better written as \'%(suggestion)s\'', $entry, 'month', { suggestion => ucfirst substr( $months[$entry->{month}-1], 0, 3 ) } ); } elsif( grep { lc $entry->{month} eq $_ && length $_ > 3 } @months ) { push @warnings, _warn_value( 'value \'%(value)s\' is better written as \'%(suggestion)s\'', $entry, 'month', { suggestion => ucfirst substr( $entry->{month}, 0, 3 ) } ); } elsif( !(grep { lc $entry->{month} eq substr( $_, 0, 3 ) || lc $entry->{month} eq substr( $_, 0, 3 ) . '.' } @months) ) { push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid month', $entry, 'month' ); } } if( exists $entry->{year} ) { # Sometimes bibliographies list the next year to show that they # are going to be published soon. my @localtime = localtime; if( $entry->{year} !~ /^[0-9]{4}$/ ) { push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid year', $entry, 'year' ); } elsif( $entry->{year} > $localtime[5] + 1901 ) { push @warnings, _warn_value( 'value \'%(value)s\' is too far in the future', $entry, 'year' ); } } # Both keys are nonstandard for my $key ('isbn', 'issn') { next if !exists $entry->{$key}; my $check = CheckDigits( $key ); if( $key eq 'isbn' ) { my $value = $entry->{$key}; $value =~ s/-//g; if( length $value == 13 ) { $check = CheckDigits( 'isbn13' ); } } next if $check->is_valid( $entry->{$key} ); push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid %(FIELD)s', $entry, $key, { FIELD => uc $key } ); } # Both keys are nonstandard for my $key ('eprint', 'url') { next if !exists $entry->{$key}; next if defined is_uri $entry->{$key}; if( $entry->{$key} =~ /^(.*)\n$/ && defined is_uri $1 ) { # BibTeX converted from YAML (i.e., Debian::DEP12) might # have trailing newline character attached. push @warnings, _warn_value( 'URL has trailing newline character', $entry, $key, { suggestion => $1 } ); next; } push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid URL', $entry, $key ); } # Nonstandard if( exists $entry->{pmid} ) { if( $entry->{pmid} =~ /^PMC[0-9]{7}$/ ) { push @warnings, _warn_value( 'PMCID \'%(value)s\' is provided instead of PMID', $entry, 'pmid' ); } elsif( $entry->{pmid} !~ /^[1-9][0-9]*$/ ) { push @warnings, _warn_value( 'value \'%(value)s\' does not look like valid PMID', $entry, 'pmid' ); } } return @warnings; } =head2 clean_BibTeX( $what ) Takes the same input as C and attempts to reconcile trivial issues like dropping the resolver URL part of DOIs (see C method) and converting month numbers into three-letter abbreviations. =cut sub clean_BibTeX { my( $what ) = @_; my $entry = _convert( $what ); # Deleting undefined values prior to the validation for (keys %$entry) { delete $entry->{$_} if !defined $entry->{$_}; } my @warnings = validate_BibTeX( $entry ); my @suggestions = grep { $_->{suggestion} } @warnings; for my $suggestion (@suggestions) { $entry->{$suggestion->{field}} = $suggestion->{suggestion}; } return $entry; } =head2 shorten_DOI( $doi ) Remove the resolver URL part, as well as C prefixes, from DOIs. =cut sub shorten_DOI($) { my( $doi ) = @_; return $doi if $doi =~ s|^https?://(dx\.)?doi\.org/||; return $doi if $doi =~ s|^doi:||; return $doi; } sub _convert { my( $what ) = @_; if( blessed $what && $what->isa( 'Text::BibTeX::Entry' ) ) { $what = { map { $_ => $what->get($_) } $what->fieldlist }; } # TODO: check for duplicated keys return { map { lc $_ => $what->{$_} } keys %$what }; } sub _warn_value { my( $message, $entry, $field, $extra ) = @_; $extra = {} unless $extra; return Text::BibTeX::Validate::Warning->new( $message, { field => $field, value => $entry->{$field}, %$extra } ); } =head1 AUTHORS Andrius Merkys, Emerkys@cpan.orgE =cut 1; Text-BibTeX-Validate-0.3.0/META.json0000644000175000017500000000266714013404076016627 0ustar andriusandrius{ "abstract" : "validator for BibTeX format", "author" : [ "Andrius Merkys " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 6.010, CPAN::Meta::Converter version 2.150010", "license" : [ "bsd" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Text-BibTeX-Validate", "prereqs" : { "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Algorithm::CheckDigits" : "0", "Data::Validate::Email" : "0", "Data::Validate::URI" : "0", "Scalar::Util" : "0", "Text::sprintfn" : "0" } }, "test" : { "requires" : { "File::Temp" : "0", "Test::More" : "0", "Text::BibTeX" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/merkys/text-bibtex-validate/issues" }, "homepage" : "https://search.cpan.org/dist/Text-BibTeX-Validate", "repository" : { "type" : "git", "url" : "git://github.com/merkys/text-bibtex-validate.git", "web" : "https://github.com/merkys/text-bibtex-validate" } }, "version" : "0.3.0", "x_serialization_backend" : "JSON::XS version 3.04" } Text-BibTeX-Validate-0.3.0/dist.ini0000644000175000017500000000113014013404076016632 0ustar andriusandriusname = Text-BibTeX-Validate author = Andrius Merkys license = BSD copyright_holder = Andrius Merkys copyright_year = 2021 version = 0.3.0 [@Filter] -bundle = @Basic -remove = License [AutoMetaResources] homepage = https://search.cpan.org/dist/%{dist} repository.github = user:merkys bugtracker.github = user:merkys [MetaJSON] [OurPkgVersion] [Prereqs] Algorithm::CheckDigits = 0 Data::Validate::Email = 0 Data::Validate::URI = 0 Scalar::Util = 0 Text::sprintfn = 0 [Prereqs / Test] -phase = test File::Temp = 0 Test::More = 0 Text::BibTeX = 0 Text-BibTeX-Validate-0.3.0/Changes0000644000175000017500000000122614013404076016467 0ustar andriusandrius0.3.0 2021-02-18 - Switching from issuing Perl warnings to returning Text::BibTeX::Validate::Warning instances. - Implementing clean_BibTeX() which cleans the BibTeX according to validation suggestions. - Suggesting replacements for month numbers and long names. 0.2.0 2021-02-15 - Renaming 'btvalidate' -> 'bibtex-validate'. - Issuing warnings as objects. - Reporting PMCIDs provided instead of PMIDs. - Reporting empty fields. - Reporting URLs with trailing newlines. 0.1.1 2021-02-03 - Adding POD. - Skipping t/03_Text_BibTeX.t if Text::BibTeX cannot be used. 0.1.0 2021-01-29 - Initial release. Text-BibTeX-Validate-0.3.0/META.yml0000644000175000017500000000152114013404076016443 0ustar andriusandrius--- abstract: 'validator for BibTeX format' author: - 'Andrius Merkys ' build_requires: File::Temp: '0' Test::More: '0' Text::BibTeX: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 0 generated_by: 'Dist::Zilla version 6.010, CPAN::Meta::Converter version 2.150010' license: bsd meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Text-BibTeX-Validate requires: Algorithm::CheckDigits: '0' Data::Validate::Email: '0' Data::Validate::URI: '0' Scalar::Util: '0' Text::sprintfn: '0' resources: bugtracker: https://github.com/merkys/text-bibtex-validate/issues homepage: https://search.cpan.org/dist/Text-BibTeX-Validate repository: git://github.com/merkys/text-bibtex-validate.git version: 0.3.0 x_serialization_backend: 'YAML::Tiny version 1.70' Text-BibTeX-Validate-0.3.0/LICENSE0000644000175000017500000000273314013404076016205 0ustar andriusandriusCopyright (c) The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Text-BibTeX-Validate-0.3.0/t/0000755000175000017500000000000014013404076015436 5ustar andriusandriusText-BibTeX-Validate-0.3.0/t/01_use.t0000644000175000017500000000014214013404076016714 0ustar andriusandriususe strict; use warnings; use Test::More tests => 1; BEGIN { use_ok('Text::BibTeX::Validate') }; Text-BibTeX-Validate-0.3.0/t/02_invalid.t0000644000175000017500000000254414013404076017557 0ustar andriusandriususe strict; use warnings; use Test::More; use Text::BibTeX::Validate qw( clean_BibTeX validate_BibTeX ); my @cases = ( [ { doi => 'not a DOI' }, 'doi: value \'not a DOI\' does not look like valid DOI' ], [ { doi => 'http://doi.org/10.1234/567890' }, 'doi: value \'http://doi.org/10.1234/567890\' is better written as \'10.1234/567890\'', { doi => '10.1234/567890' } ], [ { isbn => '0-306-40615-2' }, undef ], [ { isbn => '0-306-40615-X' }, 'isbn: value \'0-306-40615-X\' does not look like valid ISBN' ], [ { month => '02' }, 'month: value \'02\' is better written as \'Feb\'', { month => 'Feb' } ], [ { month => 'August' }, 'month: value \'August\' is better written as \'Aug\'', { month => 'Aug' } ], [ { month => 'may' }, undef ], [ { pmid => 'PMC1234567' }, 'pmid: PMCID \'PMC1234567\' is provided instead of PMID' ], [ { url => "https://example.com\n" }, 'url: URL has trailing newline character', { url => 'https://example.com' } ], ); plan tests => 3 * scalar @cases; for my $case (@cases) { my @warnings = validate_BibTeX( $case->[0] ); my $clean = clean_BibTeX( $case->[0] ); is( scalar @warnings, defined $case->[1] ? 1 : 0 ); is( @warnings ? "$warnings[0]" : undef, $case->[1] ); is_deeply( $clean, @$case == 3 ? $case->[2] : $case->[0] ); } Text-BibTeX-Validate-0.3.0/t/03_Text_BibTeX.t0000644000175000017500000000232414013404076020247 0ustar andriusandriususe strict; use warnings; use Test::More; eval 'use Text::BibTeX'; plan skip_all => 'Text::BibTeX required' if $@; use File::Temp; use Text::BibTeX::Validate qw( validate_BibTeX ); plan tests => 2; my $tmp = File::Temp->new(); my $fh; open( $fh, '>', $tmp->filename ); print $fh <<'END'; @Article{Merkys2016, author = {Merkys, Andrius and Vaitkus, Antanas and Butkus, Justas and Okuli{\v{c}}-Kazarinas, Mykolas and Kairys, Visvaldas and Gra{\v{z}}ulis, Saulius}, journal = {Journal of Applied Crystallography}, title = {{\it COD::CIF::Parser}: an error-correcting {CIF} parser for the {P}erl language}, year = {2016}, month = {Feb}, number = {1}, pages = {292--301}, volume = {49}, doi = {10.1107/S1600576715022396}, url = {http://dx.doi.org/10.1107/S1600576715022396}, } END close $fh; my $bibfile = Text::BibTeX::File->new( $tmp->filename ); my $entry = Text::BibTeX::Entry->new( $bibfile ); my $warning; ( $warning ) = validate_BibTeX( $entry ); is( $warning, undef ); $entry->set( 'doi', 'doi/10.1107/S1600576715022396' ); ( $warning ) = validate_BibTeX( $entry ); is( "$warning", 'doi: value \'doi/10.1107/S1600576715022396\' does not look like valid DOI' ); Text-BibTeX-Validate-0.3.0/MANIFEST0000644000175000017500000000043614013404076016327 0ustar andriusandrius# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.010. Changes LICENSE MANIFEST META.json META.yml Makefile.PL README dist.ini lib/Text/BibTeX/Validate.pm lib/Text/BibTeX/Validate/Warning.pm scripts/bibtex-validate t/01_use.t t/02_invalid.t t/03_Text_BibTeX.t