Text-Bidi-2.12 000755 001750 000144 0 12774222404 12666 5 ustar 00moshe users 000000 000000 README 100644 001750 000144 30232 12774222404 13647 0 ustar 00moshe users 000000 000000 Text-Bidi-2.12 NAME
Text::Bidi - Unicode bidi algorithm using libfribidi
VERSION
version 2.12
SYNOPSIS
# Each displayed line is a "paragraph"
use Text::Bidi qw(log2vis);
($par, $map, $visual) = log2vis($logical);
# or just
$visual = log2vis(...);
# For real paragraphs, need to specify the display width
($par, $map, $visual) = log2vis($logical, $width);
# object oriented approach allows one to display line by line
$p = new Text::Bidi::Paragraph $logical;
$visual = $p->visual($off, $len);
EXPORT
The following functions can be exported (nothing is exported by
default):
* "log2vis"
* "is_bidi"
* "get_mirror_char"
* "get_bidi_type_name"
* "fribidi_version"
* "unicode_version"
* "fribidi_version_num"
All of them can be exported together using the ":all" tag.
DESCRIPTION
This module provides basic support for the Unicode bidirectional (Bidi)
text algorithm, for displaying text consisting of both left-to-right and
right-to-left written languages (such as Hebrew and Arabic.) It does so
via a *swig* interface file to the *libfribidi* library.
The fundamental purpose of the bidi algorithm is to reorder text given
in logical order into text in visually correct order, suitable for
display using standard printing commands. ``Logical order'' means that
the characters are given in the order in which they would be read if
printed correctly. The direction of the text is determined by properties
of the Unicode characters, usually without additional hints. See
for more details on the
problem and the algorithm.
Standard usage
The bidi algorithm works in two stages. The first is on the level of a
paragraph, where the direction of each character is computed. The second
is on the level of the lines to be displayed. The main practical
difference is that the first stage requires only the text of the
paragraph, while the second requires knowledge of the width of the
displayed lines. The module (or the library) does not determine how the
text is broken into paragraphs.
The full interface is provided by Text::Bidi::Paragraph, see there for
details. This module provides an abbreviation, "log2vis", which combines
creating a paragraph object with calling "visual" in
Text::Bidi::Paragraph on it. It is particularly useful in the case that
the whole paragraph should be displayed at once, and the display width
is known:
$visual = log2vis($logical, $width);
There are more options (see "log2vis"), but this is essentially it. The
rest of this documentation will probably be useful only to people who
are familiar with *libfribidi* and who wish to extend or modify the
module.
The object-oriented approach
All functions here can be called using either a procedural or an object
oriented approach. For example, you may do either
$visual = log2vis($logical);
or
$bidi = new Text::Bidi;
$visual = $bidi->log2vis($logical);
The advantages of the second form is that it is easier to move to a
sub-class, and that two or more objects with different parameters can be
used simultaneously. If you are interested in deriving from this class,
please see "SUBCLASSING".
FUNCTIONS
get_bidi_type_name
say $tb->get_bidi_type_name($Text::Bidi::Type::LTR); # says 'LTR'
Return the string representation of a Bidi character type, as in
fribidi_get_bidi_type_name(3). Note that for the above example, one
needs to use Text::Bidi::Constants.
log2vis
($p, $visual) = log2vis($logical[,$width[,$dir[,$flags]]]);
Convert the input paragraph $logical to visual. This constructs a
Text::Bidi::Paragraph object, and calls "visual" in
Text::Bidi::Paragraph several times, as required. $width is the maximum
width of a line, defaulting to the whole length of the paragraph. $dir
is the base direction of the paragraph, determined automatically if not
provided. $flags is as in "visual" in Text::Bidi::Paragraph. The
paragraph will be justified to the right if it is RTL.
The output consists of the Text::Bidi::Paragraph object $p and the
visual string $visual.
is_bidi()
my $bidi = is_bidi($logical);
Returns true if the input $logical contains bidi characters. Otherwise,
the output of the bidi algorithm will be identical to the input, hence
this helps if we want to short-circuit.
get_mirror_char()
my $mir = get_mirror_char('['); # $mir == ']'
Return the mirror character of the input, possibly itself.
fribidi_version
say fribidi_version();
Returns the version information for the fribidi library
fribidi_version_num
say fribidi_version_num();
Returns the version number for the fribidi library
unicode_version
say unicode_version();
Returns the Unicode version used by the fribidi library
SUBCLASSING
The rest of the documentation is only interesting if you would like to
derive from this class. The methods listed under "METHODS" are wrappers
around the similarly named functions in libfribidi, and may be useful
for this purpose.
If you do sub-class this class, and would like the procedural interface
to use your functions, put a line like
$Text::Bidi::GlobalClass = __PACKAGE__;
in your module.
METHODS
new
$tb = new Text::Bidi [tie_byte => ..., tie_long => ...];
Create a new Text::Bidi object. If the *tie_byte* or *tie_long* options
are given, they should be the names (strings) of the classes used as
dual life arrays, most probably derived class of Text::Bidi::Array::Byte
and Text::Bidi::Array::Long, respectively.
This method is probably of little interest for standard (procedural)
use.
utf8_to_internal
$la = $tb->utf8_to_internal($str);
Convert the Perl string *$str* into the representation used by
libfribidi. The result will be a Text::Bidi::Array::Long.
internal_to_utf8
$str = $tb->internal_to_utf8($la);
Convert the long array *$la*, representing a string encoded in to format
used by libfribidi, into a Perl string. The array *$la* can be either a
Text::Bidi::Array::Long, or anything that can be used to construct it.
get_bidi_types
$types = $tb->get_bidi_types($internal);
Returns a Text::Bidi::Array::Long with the list of Bidi types of the
text given by $internal, a representation of the paragraph text, as
returned by utf8_to_internal(). Wraps fribidi_get_bidi_types(3).
get_joining_types
$types = $tb->get_joining_types($internal);
Returns a Text::Bidi::Array::Byte with the list of joining types of the
text given by $internal, a representation of the paragraph text, as
returned by "utf8_to_internal". Wraps fribidi_get_joining_types(3).
get_joining_type_name
say $tb->get_joining_type_name($Text::Bidi::Joining::U); # says 'U'
Return the string representation of a joining character type, as in
fribidi_get_joining_type_name(3). Note that for the above example, one
needs to use Text::Bidi::Constants.
get_par_embedding_levels
($odir, $lvl) = $tb->get_par_embedding_levels($types[, $dir]);
Return the embedding levels of the characters, whose types are given by
*$types*. *$types* is a Text::Bidi::Array::Long of Bidi types, as
returned by "get_bidi_types". *$dir* is the base paragraph direction. If
not given, it defaults to "FRIBIDI_PAR_ON" (neutral).
The output is the resolved paragraph direction *$odir*, and the
Text::Bidi::Array::Byte array *$lvl* of embedding levels.
join_arabic
$props = $tb->join_arabic($bidi_types, $lvl, $join_types);
Returns a Text::Bidi::Array::Byte with $props, as returned by
fribidi_join_arabic(3). The inputs are $bidi_types, as returned by
"get_bidi_types", $lvl, as returned by "get_par_embedding_levels", and
$join_types as returned by "get_joining_types". Wraps
fribidi_join_arabic(3).
shaped
($newp, $shaped) = $tb->shaped($flags, $lvl, $prop, $internal);
Returns the internal representation of the paragraph, with shaping
applied. The internal representation of the original paragraph (as
returned by "utf8_to_internal") should be passed in $internal, while the
embedding levels (as returned by "get_par_embedding_levels") should be
in $lvl. See the documentation of fribidi-arabic.h for $flags, but as a
special case, a value of "undef" here skips shaping (returning ($prop,
$internal)), while any other false value becomes the default. $prop is
as returned by "join_arabic". This method wraps fribidi_shape_arabic(3).
mirrored
$mirrored = $tb->mirrored($lvl, $internal);
Returns the internal representation of the paragraph, with mirroring
applied. The internal representation of the original paragraph (as
returned by "utf8_to_internal") should be passed in $internal, while the
embedding levels (as returned by "get_par_embedding_levels") should be
in $lvl. This method wraps fribidi_shape_mirroring(3).
reorder
$str = $tb->reorder($in, $map[, $offset[, $len]]);
say $tb->reorder([qw(A B C)], [2, 0, 1]); # says CAB
View the array ref $map as a permutation, and permute the list (of
characters) $in according to it. The result is joined, to obtain a
string. If $offset and $len are given, returns only that part of the
resulting string.
reorder_map
($elout, $mout) = $tb->reorder_map($types, $offset, $len, $par,
$map, $el, $flags);
Compute the reordering map for bidi types given by $types, for the
interval starting with $offset of length $len. Note that this part of
the algorithm depends on the interval in an essential way. $types is an
array of types, as computed by "get_bidi_types". The other arguments are
optional:
$par
The base paragraph direction. Computed via
"get_par_embedding_levels" if not defined.
$map
An array ref (or a Text::Bidi::Array::Long) from a previous call
(with a different interval). The method is called repeatedly for the
same paragraph, with different intervals, and the reordering map is
updated for the given interval. If not defined, initialised to the
identity map.
$el The embedding levels. If not given, computed by a call to
"get_par_embedding_levels".
$flags
A specification of flags, as described in fribidi_reorder_line(3).
The flags can be given either as a number (using
"$Text::Bidi::Flags::.." from Text::Bidi::Constants), or as a
hashref of the form "{REORDER_NSM => 1}". Defaults to
"FRIBIDI_FLAGS_DEFAULT".
The output consists of the modified map $mout (a
Text::Bidi::Array::Long), and possibly modified embedding levels $elout.
method remove_bidi_marks
($v, $to, $from, $levels) =
$tb->remove_bidi_marks($v[, $to[, $from[, $levels]]])
Remove the explicit bidi marks from $v. The optional arguments, if
given, are the map from the logical to the visual string, the inverse
map, and embedding levels, respectively, as returned by "reorder_map".
The inverse map $from can be obtained from the direct one $to by a
command like:
@$from[@$map] = 0..$#$map
Each of the arguments can be "undef", in which case it will be skipped.
This implements step X9, see fribidi_remove_bidi_marks(3).
BUGS
There are no real tests for any of this.
Shaping is not supported (probably), since I don't know what it is. Help
welcome!
SEE ALSO
Text::Bidi::Paragraph
Text::Bidi::Constants
Encode
The fribidi library
Swig
The unicode bidi algorithm
AUTHOR
Moshe Kamensky
COPYRIGHT AND LICENSE
This software is copyright (c) 2015 by Moshe Kamensky.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
Changes 100644 001750 000144 6660 12774222404 14252 0 ustar 00moshe users 000000 000000 Text-Bidi-2.12 Revision history for Perl module Text::Bidi
2.12 2016-10-02 18:17:21+03:00 Asia/Jerusalem
* Added META comments to misc/bidi
* Misc build and install changes, issues #108736, #108738 (Thanks
ppisar@redhat.com)
2.11 2015-11-08 01:08:59+02:00 Asia/Jerusalem
* Fix versioning issue (https://rt.cpan.org/Public/Bug/Display.html?id=108498)
* Fixed a bug in combining characters (Niqqud) handling, and added
support for it in the urxvt plugin
2.10 2015-10-17 20:53:51+03:00 Asia/Jerusalem
* Implemented Arabic joining and shaping (thanks to Dmitry Karasik)
2.09 2014-06-03 09:51:08IDT+0300 Asia/Jerusalem
* Fixed pod language (rt #96158)
* Added missing ; in Makefile.PL
* Made ucd.t optional via the TEXT_BIDI_SKIP_UCD env variable
2.08 2013-10-18 15:48:39 Asia/Jerusalem
* Made ucd.t less fail prone on older versions of Perl and fribidi
* Added fribidi_version_num to Text::Bidi
2.07 2013-10-17 12:47:51 Asia/Jerusalem
* Added functions for retrieving version of libfribidi and Unicode
* Added method type_names() to Text::Bidi::Paragraph
* get_bidi_type_name() in Text::Bidi is now exportable
* Added debugging options --hex, --types, --verbose ... to fribidi.pl
* Fixed bugs in fribidi.pl
* Fixed bug with variable interpolation in Constants.pm
2.06 2013-10-15 23:28:06 Asia/Jerusalem
* Made log2vis() more useful
* Bug fixes
* Added some tests
* Improved docs
2.05 2013-09-23 11:56:35 Asia/Jerusalem
* Added a README.mkdn file
2.04 2013-09-23 11:46:51 Asia/Jerusalem
* The Fribidi constants are now loaded via a separate module,
Text::Bidi::Constants
* Switching to Dist::Zilla
2.03 2013-09-20 12:09:39 Asia/Jerusalem
* More changes to Makefile.PL
2.02 2013-09-12
* Fixed Makefile.PL
2.01 2013-09-11
* Refactored Text::Bidi::Array::* to have a common base class
Text::Bidi::Array
* Significantly improved the documentation of Text::Bidi::Array*
* Fixed a bug in Text::Bidi::Array::Long --- now returns the correct
size
* Added loading tests for Text::Bidi::Array*
* Removed "no warnings 'experimental'", since this caused tests to
fail on older versions of perl.
* Added information about the bidi extension for urxvt to the README
file
2.00 2013-09-10
* Almost completely rewritten, mostly due to the changes in
libfribidi. The interface has changed as well, so this version is
incompatible with the previous ones.
0.04 2008-02-07
* Changed the build system to use ExtUtils::PkgConfig (if available),
to find the correct compilation flags (this is required since the
new version of libfribidi may need glib to build.)
0.03 2007-01-18
* The PUSH macros caused some weird results, replaced with my own
macros
* Apparently, Newx did not exist before 5.8.8, defined it if does not
exist
0.02 2007-01-17
* Fixed memory leak in remove_bidi_marks when called with two
arguments
* Fixed bug when calling log2vis with 2 arguments
* Changed the typemaps to use more perlapi macros (XPUSH, etc.)
0.01 2006-08-28
* First version
t 000755 001750 000144 0 12774222404 13052 5 ustar 00moshe users 000000 000000 Text-Bidi-2.12 ucd.t 100644 001750 000144 10465 12774222404 14200 0 ustar 00moshe users 000000 000000 Text-Bidi-2.12/t #!/usr/bin/env perl
# $Id$
use 5.10.0;
use warnings;
use integer;
use open qw[:encoding(utf-8) :std];
use charnames qw(:full :short);
use version 0.77;
BEGIN {
binmode STDOUT => ':utf8';
binmode STDERR => ':utf8';
}
use Test::More;
plan skip_all => "'\$TEXT_BIDI_SKIP_UCD' set to $ENV{TEXT_BIDI_SKIP_UCD}"
if $ENV{'TEXT_BIDI_SKIP_UCD'};
sub crange { map { chr } $_[0]..$_[1] }
no warnings 'qw';
my %char = (
L => ['A'..'Z','a'..'z',"\N{LEFT-TO-RIGHT MARK}"],
R => [crange(ord("\N{hebrew:alef}"), ord("\N{hebrew:tav}")), "\N{RIGHT-TO-LEFT MARK}"],
AL => [crange(ord("\N{arabic:alef}"), ord("\N{arabic:yeh}")), chr(0x61c)],
EN => ['0'..'9'],
ES => [qw(+ -)],
ET => [qw(# $ %), crange(0xa2,0xa5)],
AN => [crange(0x600, 0x604)],
CS => [qw(, . / :), chr(0xa0), chr(0x60c)],
NSM => [crange(0x300, 0x36f),crange(0x610, 0x61a)],
BN => [crange(0, 8), crange(0xe, 0x1b), crange(0x7f, 0x84), crange(0x86, 0x9f)],
B => [map { chr } (0xa, 0xd, 0x1c..0x1e, 0x85, 0x2029)],
S => [chr(9), chr(0xb), chr(0x1f)],
WS => [chr(0xc), ' '],
ON => [qw(! " & ' * ; < = > ? @ [ \ ] ^ _ ` { | } ~), chr(0x606), chr(0x60e)],
LRE => ["\N{LEFT-TO-RIGHT EMBEDDING}"],
LRO => ["\N{LEFT-TO-RIGHT OVERRIDE}"],
RLE => ["\N{RIGHT-TO-LEFT EMBEDDING}"],
RLO => ["\N{RIGHT-TO-LEFT OVERRIDE}"],
PDF => ["\N{POP DIRECTIONAL FORMATTING}"],
LRI => [chr(0x2066)],
RLI => [chr(0x2067)],
FSI => [chr(0x2068)],
PDI => [chr(0x2069)],
);
use Text::Bidi qw(log2vis get_bidi_type_name unicode_version);
BEGIN {
plan skip_all => 'libfribidi Unicode version too old'
if version->parse(unicode_version()) < v6.0.0;
}
use Text::Bidi::Constants;
sub char {
my $l = $char{$_[0]};
my $i = int(rand(scalar(@$l)));
$l->[$i]
}
sub dirs {
my $bits = shift;
my @res;
push @res, $Text::Bidi::Par::ON if $bits & 1;
push @res, $Text::Bidi::Par::LTR if $bits & 2;
push @res, $Text::Bidi::Par::RTL if $bits & 4;
@res
}
{
use Data::Dumper;
my $dd = (new Data::Dumper [])->Terse(1)->Indent(0)->Useqq(1);
sub escape {
$dd->Values(\@_)->Dump
}
}
open my $fh, '<', 't/BidiTest.txt'
or plan skip_all => "can't open UCD datafile: $!";
open my $err, '<', 't/known.txt'
or plan skip_all => "Can't open known errors file: $!";
our %known;
foreach ( <$err> ) {
next unless /^(.*): (.*)$/;
$known{$1} = $2;
}
# we don't reorder NSM
my $flags = $Text::Bidi::Flags::DEFAULT & ~$Text::Bidi::Flag::REORDER_NSM;
foreach ( <$fh> ) {
next if /^\s*(#|$)/;
chomp;
if ( /^\@Levels:\s*(.*)/ ) {
@levels = split ' ', $1;
@levund = grep { $levels[$_] eq 'x' } 0..$#levels;
%levund = ();
$levund{$_} = 1 foreach @levund;
next;
}
if ( /^\@Reorder:\s*(.*)/ ) { @reorder = split ' ', $1; next }
if ( /^([A-Z ]*); ([1-7])/ ) {
my $bits = $2;
my $ing = $1;
my @chars = map { char($_) } (split ' ', $ing);
my @ords = map { ord } @chars;
my $in = join ('', @chars);
my $ine = escape($in);
for my $pdir ( dirs($bits) ) {
SKIP: {
my $pdname = get_bidi_type_name($pdir);
skip 'Test fails in libfribidi', 2
if defined $known{"$ing;$pdname"};
my ($p, $vis) = log2vis($in, length($in), $pdir, $flags);
my $lev = $p->levels;
my @olev = @$lev;
$olev[$_] = 'x' foreach @levund;
local $" = ',';
my @int = @{$p->_unicode};
my @types = $p->type_names;
my $cpdname = get_bidi_type_name($p->dir);
#say $err "$ing;$pdname: @olev > @levels % $in" unless
is("@olev", "@levels", <map;
my @map = grep { not $levund{$_} } @$map;
is("@map", "@reorder", <