unicode-display_width-3.2.0/0000755000004100000410000000000015061663675016053 5ustar www-datawww-dataunicode-display_width-3.2.0/data/0000755000004100000410000000000015061663675016764 5ustar www-datawww-dataunicode-display_width-3.2.0/data/display_width.marshal.gz0000644000004100000410000000402415061663675023620 0ustar www-datawww-dataٽh7@I+6u0hp@\$nq"C \%($EsSȵD' *BoOleLdIE1$&̔rHTauU]Te\]fknUWeg-{ "ETXmv^2eh/b_z^AݲyR2LBW>ZB;>k k>W2uu_fVx7J"-8 )Z%d,ia0"c$[zK^z庠YFͩ Kȵ1l4FjrwL}Wgٲ/_.=X8NmKOŲGz"UEw%INɀtKyd}'Y4]{|eOZ0օzǸxP*R"W5[ő[WF5?酭*"fRLm+ B@r8.˚sڞp|f6Ix xoE ^Cj $[.P c.Κ)Bo11dزS,WP !' "ZA* yy:<}ݮOjn"L'a[ÕlpM[iB\PVP x#GMs )S *b!pc j G`5 2+ ^|h,MK=& pX=ttUܦMȫ4D!wx@9t6#-W7o7l*M[Bra0ՠ>ױ&"{qo^߷tF e`]/D#$,)aն 3}檝C1Q nkF`an#ƈ1B/#2޹1bn`l:ĿC+uζmW13 "EPNGFR.Jo~Yzx?)+G9&uɤ{0Qijt H1Ɓ$;/%$2 o6D `W/JoݫJTyH0AX/|&EduRIZ~W8+K~LG蘚֢?H]Qq!6إ!}w;jƖfbn*4]MB"CUa Bù0nsǐzbD#;]PCsPaRsǐ%$8@Y*sC\7s1suP?ɲΖ cWF n;} O.fn;BعGghCunicode-display_width-3.2.0/lib/0000755000004100000410000000000015061663675016621 5ustar www-datawww-dataunicode-display_width-3.2.0/lib/unicode/0000755000004100000410000000000015061663675020247 5ustar www-datawww-dataunicode-display_width-3.2.0/lib/unicode/display_width/0000755000004100000410000000000015061663675023113 5ustar www-datawww-dataunicode-display_width-3.2.0/lib/unicode/display_width/string_ext.rb0000644000004100000410000000040415061663675025624 0ustar www-datawww-data# frozen_string_literal: true require_relative "../display_width" class String def display_width(ambiguous = nil, overwrite = nil, old_options = {}, **options) Unicode::DisplayWidth.of(self, ambiguous, overwrite, old_options = {}, **options) end end unicode-display_width-3.2.0/lib/unicode/display_width/no_string_ext.rb0000644000004100000410000000046215061663675026324 0ustar www-datawww-data# frozen_string_literal: true warn "You are loading 'unicode-display_width/no_string_ext'\n" \ "Beginning with version 2.0, this is not necessary anymore\n"\ "You can just require 'unicode-display_width' now and no\n"\ "string extension will be loaded" require_relative "../display_width" unicode-display_width-3.2.0/lib/unicode/display_width/reline_ext.rb0000644000004100000410000000044315061663675025577 0ustar www-datawww-data# Experimental # Patches Reline's get_mbchar_width to use Unicode::DisplayWidth require "reline" require "reline/unicode" require_relative "../display_width" class Reline::Unicode def self.get_mbchar_width(mbchar) Unicode::DisplayWidth.of(mbchar, Reline.ambiguous_width) end end unicode-display_width-3.2.0/lib/unicode/display_width/constants.rb0000644000004100000410000000042615061663675025456 0ustar www-datawww-data# frozen_string_literal: true module Unicode class DisplayWidth VERSION = "3.2.0" UNICODE_VERSION = "17.0.0" DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/") INDEX_FILENAME = DATA_DIRECTORY + "/display_width.marshal.gz" end end unicode-display_width-3.2.0/lib/unicode/display_width/index.rb0000644000004100000410000000144715061663675024555 0ustar www-datawww-data# frozen_string_literal: true require "zlib" require_relative "constants" module Unicode class DisplayWidth File.open(INDEX_FILENAME, "rb") do |file| serialized_data = Zlib::GzipReader.new(file).read serialized_data.force_encoding Encoding::BINARY INDEX = Marshal.load(serialized_data) end def self.decompress_index(index, level) index.flat_map{ |value| if level > 0 if value.instance_of?(Array) value[15] ||= nil decompress_index(value, level - 1) else decompress_index([value] * 16, level - 1) end else if value.instance_of?(Array) value[15] ||= nil value else [value] * 16 end end } end end end unicode-display_width-3.2.0/lib/unicode/display_width/emoji_support.rb0000644000004100000410000000250515061663675026341 0ustar www-datawww-data# frozen_string_literal: true module Unicode class DisplayWidth module EmojiSupport # Tries to find out which terminal emulator is used to # set emoji: config to best suiting value # # Please also see section in README.md and # misc/terminal-emoji-width.rb # # Please note: Many terminals do not set any ENV vars, # maybe CSI queries can help? def self.recommended @recommended ||= _recommended end def self._recommended if ENV["CI"] return :rqi end case ENV["TERM_PROGRAM"] when "iTerm.app" return :all when "Apple_Terminal" return :rgi_at when "WezTerm" return :all_no_vs16 end case ENV["TERM"] when "contour","foot" # konsole: all, how to detect? return :all when /kitty/ return :vs16 end if ENV["WT_SESSION"] # Windows Terminal return :vs16 end # As of last time checked: gnome-terminal, vscode, alacritty :none end # Maybe: Implement something like https://github.com/jquast/ucs-detect # which uses the terminal cursor to check for best support level # at runtime # def self.detect! # end end end end unicode-display_width-3.2.0/lib/unicode/display_width.rb0000644000004100000410000001671115061663675023446 0ustar www-datawww-data# frozen_string_literal: true require "unicode/emoji" require_relative "display_width/constants" require_relative "display_width/index" require_relative "display_width/emoji_support" module Unicode class DisplayWidth DEFAULT_AMBIGUOUS = 1 INITIAL_DEPTH = 0x10000 ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n-\x0F]/ ASCII_NON_ZERO_STRING = "\0\x05\a\b\n-\x0F" ASCII_BACKSPACE = "\b" AMBIGUOUS_MAP = { 1 => :WIDTH_ONE, 2 => :WIDTH_TWO, } FIRST_AMBIGUOUS = { WIDTH_ONE: 768, WIDTH_TWO: 161, } NOT_COMMON_NARROW_REGEX = { WIDTH_ONE: /[^\u{10}-\u{2FF}]/m, WIDTH_TWO: /[^\u{10}-\u{A1}]/m, } FIRST_4096 = { WIDTH_ONE: decompress_index(INDEX[:WIDTH_ONE][0][0], 1), WIDTH_TWO: decompress_index(INDEX[:WIDTH_TWO][0][0], 1), } EMOJI_SEQUENCES_REGEX_MAPPING = { rgi: :REGEX_INCLUDE_MQE_UQE, rgi_at: :REGEX_INCLUDE_MQE_UQE, possible: :REGEX_WELL_FORMED, } REGEX_EMOJI_VS16 = Regexp.union( Regexp.compile( Unicode::Emoji::REGEX_TEXT_PRESENTATION.source + "(? 15 && codepoint < first_ambiguous width += 1 elsif codepoint < 0x1001 width += index_low[codepoint] || 1 else d = INITIAL_DEPTH w = index_full[codepoint / d] while w.instance_of? Array w = w[(codepoint %= d) / (d /= 16)] end width += w || 1 end } # Return result + prevent negative lengths width < 0 ? 0 : width end # Returns width of custom overwrites and remaining string def self.width_custom(string, overwrite) width = 0 string = string.each_codepoint.select{ |codepoint| if overwrite[codepoint] width += overwrite[codepoint] nil else codepoint end }.pack("U*") [width, string] end # Returns width for ASCII-only strings. Will consider zero-width control symbols. def self.width_ascii(string) if string.match?(ASCII_NON_ZERO_REGEX) res = string.delete(ASCII_NON_ZERO_STRING).bytesize - string.count(ASCII_BACKSPACE) return res < 0 ? 0 : res end string.bytesize end # Returns width of all considered Emoji and remaining string def self.emoji_width(string, mode = :all, ambiguous = DEFAULT_AMBIGUOUS) res = 0 if emoji_set_regex = EMOJI_SEQUENCES_REGEX_MAPPING[mode] emoji_width_via_possible( string, Unicode::Emoji.const_get(emoji_set_regex), mode == :rgi_at, ambiguous, ) elsif mode == :all_no_vs16 no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES){ res += 2; "" } [res, no_emoji_string] elsif mode == :vs16 no_emoji_string = string.gsub(REGEX_EMOJI_VS16){ res += 2; "" } [res, no_emoji_string] elsif mode == :all no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ res += 2; "" } [res, no_emoji_string] else [0, string] end end # Match possible Emoji first, then refine def self.emoji_width_via_possible(string, emoji_set_regex, strict_eaw = false, ambiguous = DEFAULT_AMBIGUOUS) res = 0 # For each string possibly an emoji no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ |emoji_candidate| # Check if we have a combined Emoji with width 2 (or EAW an Apple Terminal) if emoji_candidate == emoji_candidate[emoji_set_regex] if strict_eaw res += self.of(emoji_candidate[0], ambiguous, emoji: false) else res += 2 end "" # We are dealing with a default text presentation emoji or a well-formed sequence not matching the above Emoji set else if !strict_eaw # Ensure all explicit VS16 sequences have width 2 emoji_candidate.gsub!(REGEX_EMOJI_VS16){ res += 2; "" } end emoji_candidate end } [res, no_emoji_string] end def self.normalize_options(string, ambiguous = nil, overwrite = nil, old_options = {}, **options) unless old_options.empty? warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}" options.merge! old_options end options[:ambiguous] = ambiguous if ambiguous options[:ambiguous] ||= DEFAULT_AMBIGUOUS if options[:ambiguous] != 1 && options[:ambiguous] != 2 raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2" end if overwrite && !overwrite.empty? warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}" options[:overwrite] = overwrite end options[:overwrite] ||= {} if [nil, true, :auto].include?(options[:emoji]) options[:emoji] = EmojiSupport.recommended elsif options[:emoji] == false options[:emoji] = :none end options end def initialize(ambiguous: DEFAULT_AMBIGUOUS, overwrite: {}, emoji: true) @ambiguous = ambiguous @overwrite = overwrite @emoji = emoji end def get_config(**kwargs) { ambiguous: kwargs[:ambiguous] || @ambiguous, overwrite: kwargs[:overwrite] || @overwrite, emoji: kwargs[:emoji] || @emoji, } end def of(string, **kwargs) self.class.of(string, **get_config(**kwargs)) end end end unicode-display_width-3.2.0/unicode-display_width.gemspec0000644000004100000410000000423415061663675023713 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: unicode-display_width 3.2.0 ruby lib Gem::Specification.new do |s| s.name = "unicode-display_width".freeze s.version = "3.2.0".freeze s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.metadata = { "bug_tracker_uri" => "https://github.com/janlelis/unicode-display_width/issues", "changelog_uri" => "https://github.com/janlelis/unicode-display_width/blob/main/CHANGELOG.md", "rubygems_mfa_required" => "true", "source_code_uri" => "https://github.com/janlelis/unicode-display_width" } if s.respond_to? :metadata= s.require_paths = ["lib".freeze] s.authors = ["Jan Lelis".freeze] s.date = "2025-09-09" s.description = "[Unicode 17.0.0] Determines the monospace display width of a string using EastAsianWidth.txt, Unicode general category, Emoji specification, and other data.".freeze s.email = ["hi@ruby.consulting".freeze] s.extra_rdoc_files = ["CHANGELOG.md".freeze, "MIT-LICENSE.txt".freeze, "README.md".freeze] s.files = ["CHANGELOG.md".freeze, "MIT-LICENSE.txt".freeze, "README.md".freeze, "data/display_width.marshal.gz".freeze, "lib/unicode/display_width.rb".freeze, "lib/unicode/display_width/constants.rb".freeze, "lib/unicode/display_width/emoji_support.rb".freeze, "lib/unicode/display_width/index.rb".freeze, "lib/unicode/display_width/no_string_ext.rb".freeze, "lib/unicode/display_width/reline_ext.rb".freeze, "lib/unicode/display_width/string_ext.rb".freeze] s.homepage = "https://github.com/janlelis/unicode-display_width".freeze s.licenses = ["MIT".freeze] s.required_ruby_version = Gem::Requirement.new(">= 2.5.0".freeze) s.rubygems_version = "3.5.21".freeze s.summary = "Determines the monospace display width of a string in Ruby.".freeze s.specification_version = 4 s.add_development_dependency(%q.freeze, ["~> 13.0".freeze]) s.add_development_dependency(%q.freeze, ["~> 3.4".freeze]) s.add_runtime_dependency(%q.freeze, ["~> 4.1".freeze]) end unicode-display_width-3.2.0/MIT-LICENSE.txt0000644000004100000410000000207115061663675020325 0ustar www-datawww-dataThe MIT LICENSE Copyright (c) 2011, 2015-2024 Jan Lelis Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. unicode-display_width-3.2.0/README.md0000644000004100000410000002645715061663675017350 0ustar www-datawww-data# Unicode::DisplayWidth [![[version]](https://badge.fury.io/rb/unicode-display_width.svg)](https://badge.fury.io/rb/unicode-display_width) [](https://github.com/janlelis/unicode-display_width/actions?query=workflow%3ATest) Determines the monospace display width of a string in Ruby, which is useful for all kinds of terminal-based applications. The implementation is based on [EastAsianWidth.txt](https://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt), the [Emoji specfication](https://www.unicode.org/reports/tr51/) and other data, 100% in Ruby. It does not rely on the OS vendor ([wcwidth](https://github.com/janlelis/wcswidth-ruby)) to provide an up-to-date method for measuring string width in terminals. Unicode version: **17.0.0** (September 2025) ## Gem Version 3 — Improved Emoji Support **Emoji support is now enabled by default.** See below for description and configuration possibilities. **Unicode::DisplayWidth.of now takes keyword arguments:** { ambiguous:, emoji:, overwrite: } See [CHANGELOG](/CHANGELOG.md) for details. ## Gem Version 2.4.2 — Performance Updates **If you use this gem, you should really upgrade to 2.4.2 or newer. It's often 100x faster, sometimes even 1000x and more!** This is possible because the gem now detects if you use very basic (and common) characters, like ASCII characters. Furthermore, the character width lookup code has been optimized, so even when the string involves full-width or ambiguous characters, the gem is much faster now. ## Introduction to Character Widths Guessing the correct space a character will consume on terminals is not easy. There is no single standard. Most implementations combine data from [East Asian Width](https://www.unicode.org/reports/tr11/), some [General Categories](https://en.wikipedia.org/wiki/Unicode_character_property#General_Category), and hand-picked adjustments. ### How this Library Handles Widths Further at the top means higher precedence. Please expect changes to this algorithm with every MINOR version update (the X in 1.X.0)! Width | Characters | Comment -------|------------------------------|-------------------------------------------------- ? | (user defined) | Overwrites any other values ? | Emoji | See "How this Library Handles Emoji Width" below -1 | `"\b"` | Backspace (total width never below 0) 0 | `"\0"`, `"\x05"`, `"\a"`, `"\n"`, `"\v"`, `"\f"`, `"\r"`, `"\x0E"`, `"\x0F"` | [C0 control codes](https://en.wikipedia.org/wiki/C0_and_C1_control_codes#C0_.28ASCII_and_derivatives.29) which do not change horizontal width 1 | `"\u{00AD}"` | SOFT HYPHEN 2 | `"\u{2E3A}"` | TWO-EM DASH 3 | `"\u{2E3B}"` | THREE-EM DASH 0 | General Categories: Mn, Me, Zl, Zp, Cf (non-arabic)| Excludes ARABIC format characters 0 | Derived Property: Default_Ignorable_Code_Point | Ignorable ranges 0 | `"\u{1160}".."\u{11FF}"`, `"\u{D7B0}".."\u{D7FF}"` | HANGUL JUNGSEONG 2 | East Asian Width: F, W | Full-width characters 2 | `"\u{3400}".."\u{4DBF}"`, `"\u{4E00}".."\u{9FFF}"`, `"\u{F900}".."\u{FAFF}"`, `"\u{20000}".."\u{2FFFD}"`, `"\u{30000}".."\u{3FFFD}"` | Full-width ranges 1 or 2 | East Asian Width: A | Ambiguous characters, user defined, default: 1 1 | All other codepoints | - ## Install Install the gem with: $ gem install unicode-display_width Or add to your Gemfile: gem 'unicode-display_width' ## Usage ```ruby require 'unicode/display_width' Unicode::DisplayWidth.of("⚀") # => 1 Unicode::DisplayWidth.of("一") # => 2 ``` ### Ambiguous Characters The second parameter defines the value returned by characters defined as ambiguous: ```ruby Unicode::DisplayWidth.of("·", 1) # => 1 Unicode::DisplayWidth.of("·", 2) # => 2 ``` ### Encoding Notes - Data with *BINARY* encoding is interpreted as UTF-8, if possible - Non-UTF-8 strings are converted to UTF-8 before measuring, using the [`{invalid: :replace, undef: :replace}`) options](https://ruby-doc.org/3.3.5/encodings_rdoc.html#label-Encoding+Options) ### Custom Overwrites You can overwrite how to handle specific code points by passing a hash (or even a proc) as `overwrite:` parameter: ```ruby Unicode::DisplayWidth.of("a\tb", 1, overwrite: { "\t".ord => 10 })) # => TAB counted as 10, result is 12 ``` Please note that using overwrites disables some perfomance optimizations of this gem. ### Emoji If your terminal supports it, the gem detects Emoji and Emoji sequences and adjusts the width of the measured string. This can be disabled by passing `emoji: false` as an argument: ```ruby Unicode::DisplayWidth.of "🤾🏽‍♀️", emoji: :all # => 2 Unicode::DisplayWidth.of "🤾🏽‍♀️", emoji: false # => 5 ``` #### How this Library Handles Emoji Width There are many Emoji which get constructed by combining other Emoji in a sequence. This makes measuring the width complicated, since terminals might either display the combined Emoji or the separate parts of the Emoji individually. Another aspect where terminals disagree is whether Emoji characters which have a text presentation by default (width 1) should be turned into full-width (width 2) when combined with Variation Selector 16 (*U+FEOF*). Finally, it varies if Skin Tone Modifiers can be applied to all characters or just to those with the "Emoji Base" property. Emoji Type | Width / Comment ------------|---------------- Basic/Single Emoji character without Variation Selector | No special handling Basic/Single Emoji character with VS15 (Text) | No special handling Basic/Single Emoji character with VS16 (Emoji) | 2 or East Asian Width (see table below) Single Emoji character with Skin Tone Modifier | 2 unless Emoji mode is `:none` or `vs16` Skin Tone Modifier used in isolation or with invalid base | 2 if Emoji mode is `:rgi` / `:rgi_at` Emoji Sequence | 2 if Emoji belongs to configured Emoji set (see table below) #### Emoji Modes The `emoji:` option can be used to configure which type of Emoji should be considered to have a width of 2 and if VS16-Emoji should be widened. Other sequences are treated as non-combined Emoji, so the widths of all partial Emoji add up (e.g. width of one basic Emoji + one skin tone modifier + another basic Emoji). The following Emoji settings can be used: `emoji:` Option | VS16-Emoji Width | Emoji Sequences Width / Comment | Example Terminals ----------------|------------------|---------------------------------|------------------ `true` or `:auto` | - | Automatically use recommended Emoji setting for your terminal | - `:all` | 2 | 2 for all ZWJ/modifier/keycap sequences, even if they are not well-formed Emoji sequences | iTerm, foot `:all_no_vs16` | EAW (1 or 2) | 2 for all ZWJ/modifier/keycap sequences, even if they are not well-formed Emoji sequences | WezTerm `:possible`| 2 | 2 for all possible/well-formed Emoji sequences | ? `:rgi` | 2 | 2 for all [RGI Emoji](https://www.unicode.org/reports/tr51/#def_rgi_set) sequences | ? `:rgi_at` | EAW (1 or 2) | 1 or 2: Like `:rgi`, but Emoji sequences starting with a default-text Emoji have EAW | Apple Terminal `:vs16` | 2 | 2 * number of partial Emoji (sequences never considered to represent a combined Emoji) | kitty? `false` or `:none` | EAW (1 or 2) | No Emoji adjustments | gnome-terminal, many older terminals - *EAW:* East Asian Width - *RGI Emoji:* Emoji Recommended for General Interchange - *ZWJ:* Zero-width Joiner: Codepoint `U+200D`,used in many Emoji sequences #### Emoji Support in Terminals Unfortunately, the level of Emoji support varies a lot between terminals. While some of them are able to display (almost) all Emoji sequences correctly, others fall back to displaying sequences of basic Emoji. When `emoji: true` or `emoji: :auto` is used, the gem will attempt to set the best fitting Emoji setting for you (e.g. `:rgi_at` on "Apple_Terminal" or `false` on Gnome's terminal widget). Please note that Emoji display and number of terminal columns used might differs a lot. For example, it might be the case that a terminal does not understand which Emoji to display, but still manages to calculate the proper amount of terminal cells. The automatic Emoji support level per terminal only considers the latter (cursor position), not the actual Emoji image(s) displayed. Please [open an issue](https://github.com/janlelis/unicode-display_width/issues/new) if you notice your terminal application could use a better default value. Also see the [ucs-detect project](https://ucs-detect.readthedocs.io/results.html), which is a great resource that compares various terminal's Unicode/Emoji capabilities. You can visually check how your terminals renders different kind of Emoji types with the [terminal-emoji-width.rb script](https://github.com/janlelis/unicode-display_width/blob/main/misc/terminal-emoji-width.rb). **To terminal implementors reading this:** Although the practice of giving all Emoji/ZWJ sequences a width of 2 (`:all` mode described above) has some advantages, it does not lead to a particularly good developer experience. Since there is always the possibility of well-formed Emoji that are currently not supported (non-RGI / future Unicode) appearing, those sequences will take more cells. Instead of overflowing, cutting off sequences or displaying placeholder-Emoji, could it be worthwile to implement the `:rgi` option (only known Emoji get width 2) and give those unknown Emoji the space they need? This would support the idea that the meaning of an unknown Emoji sequence can still be conveyed (without messing up the terminal at the same time). Just a thought… ### Usage with String Extension ```ruby require 'unicode/display_width/string_ext' "⚀".display_width # => 1 '一'.display_width # => 2 ``` ### Usage with Config Object You can use a config object that allows you to save your configuration for later-reuse. This requires an extra line of code, but has the advantage that you'll need to define your string-width options only once: ```ruby require 'unicode/display_width' display_width = Unicode::DisplayWidth.new( # ambiguous: 1, overwrite: { "A".ord => 100 }, emoji: :all, ) display_width.of "⚀" # => 1 display_width.of "🤠‍🤢" # => 2 display_width.of "A" # => 100 ``` ### Usage from the Command-Line Use this one-liner to print out display widths for strings from the command-line: ``` $ gem install unicode-display_width $ ruby -r unicode/display_width -e 'puts Unicode::DisplayWidth.of $*[0]' -- "一" ``` Replace "一" with the actual string to measure ## Other Implementations & Discussion - Python: https://github.com/jquast/wcwidth - JavaScript: https://github.com/mycoboco/wcwidth.js - C: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - C for Julia: https://github.com/JuliaLang/utf8proc/issues/2 - Golang: https://github.com/rivo/uniseg See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries. ## Copyright & Info - Copyright (c) 2011, 2015-2025 Jan Lelis, https://janlelis.com, released under the MIT license - Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run - Unicode data: https://www.unicode.org/copyright.html#Exhibit1 unicode-display_width-3.2.0/CHANGELOG.md0000644000004100000410000001652215061663675017672 0ustar www-datawww-data# CHANGELOG ## 3.2.0 - Unicode 17.0 ## 3.1.5 - Cache Emoji support level for performance reasons #30, patch by @Earlopain: ## 3.1.4 - Fix that skin tone modifiers were ignored when used in a non-ZWJ sequence context (= single emoji char + modifier) #29 - Add more docs and specs about modifier handling ## 3.1.3 Better handling of non-UTF-8 strings, patch by @Earlopain: - Data with *BINARY* encoding is interpreted as UTF-8, if possible - Use `invalid: :replace` and `undef: :replace` options when converting to UTF-8 ## 3.1.2 - Performance improvements ## 3.1.1 - Performance improvements ## 3.1.0 **Improve Emoji support:** - Emoji modes: Differentiate between well-formed Emoji (`:possible`) and any ZWJ/modifier sequence (`:all`). The latter is more common and more efficient to implement. - Unify `:rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to the former `:rgi_uqe` option). Most terminals that want to support the RGI set will probably want to catch Emoji sequences with missing VS16s. - Add new `:all_no_vs16` and `:rgi_at` modes to be able to support some terminals that needs these quirks - Add alias `emoji: :auto` for `emoji: true` and `emoji: :none` for `emoji: false` - `:auto` mode: Only consider terminal cells when recommending Emoji support level (Emoji themselves might display differently) - `:auto` mode: Set default Emoji mode for unknown/unsupported terminals to `:none` - Rename `:basic` mode to `:vs16` ## 3.0.1 - Add WezTerm and foot as good Emoji terminals ## 3.0.0 **Rework Emoji support:** - Emoji widths are now enabled by default - Only reduce Emoji width to 2 when RGI Emoji detected (configurable) - VS16 turns Emoji characters of width 1 into full-width - Please note that Emoji parsing has a notable impact on performance. You can use the `emoji: false` option to disable Emoji adjustments - Tries to detect terminal's Emoji support level automatically (from ENV vars) **Index fixes and updates:** - Private-use characters are considered ambiguous (were given width 1 before) - Fix that a few zero-width ignorable codepoints from recent Unicode were missing - Consider the following separators to be zero-width: - U+2028 - LINE SEPARATOR - Zl - U+2029 - PARAGRAPH SEPARATOR - Zp **Other:** - Add keyword arguments to `Unicode::DisplayWidth.of`. If you are using a hash with overwrite values as third parameter, be sure to put it in curly braces. - Using third parameter or explicit hash as fourth parameter is deprecated, please migrate to the keyword arguments API - Gem raises `ArgumentError` for ambiguous values other than 1 or 2 - Performance optimizations - Require Ruby 2.5 ## 2.6.0 - Unicode 16 ## 2.5.0 - Unicode 15.1 ## 2.4.2 More performance improvements: - Optimize lookup of first 4096 codepoints - Avoid overwrite lookup if no overwrites are set ## 2.4.1 - Improve general performance! - Further improve performance for ASCII strings *You should really upgrade - it's much faster now!* ## 2.4.0 - Improve performance for ASCII-only strings, by @fatkodima - Require Ruby 2.4 ## 2.3.0 - Unicode 15.0 ## 2.2.0 - Add *Hangul Jamo Extended-B* block to zero-width chars, thanks @ninjalj #22 ## 2.1.0 - Unicode 14.0 ## 2.0.0 Add Support for Ruby 3.0 ### Breaking Changes Some features of this library were marked deprecated for a long time and have been removed with Version 2.0: - Aliases of display\_width (…\_size, …\_length) have been removed - Auto-loading of string core extension has been removed: If you are relying on the `String#display_width` string extension to be automatically loaded (old behavior), please load it explicitly now: ```ruby require "unicode/display_width/string_ext" ``` You could also change your `Gemfile` line to achieve this: ```ruby gem "unicode-display_width", require: "unicode/display_width/string_ext" ``` ## 2.0.0.pre2 - Update 2.0 branch to Unicode 13 ## 2.0.0.pre1 Will be published as non-pre version on rubygems.org when Ruby 3.0 is released (December 2020) - Introduce new class-based API, which remembers your string-width configuration. See README for details. - Remove auto-loading of string extension - You can: `require "unicode/display_width/string_ext"` to continue to use the string extension - The manual opt-out `require "unicode/display_width/no_string_ext"` is not needed anymore and will issue a warning in the future - Remove (already deprecated) String#display_size and String#display_width aliases Refactorings / Internal Changes: - Freeze string literals - The Unicode::DisplayWidth now is class, instead of a module, this enables the new config-object API ## 1.8.0 - Unicode 14.0 (last release of 1.x) ## 1.7.0 - Unicode 13 ## 1.6.1 - Fix that ambiguous and overwrite options where ignored for emoji-measuring ## 1.6.0 - Unicode 12.1 ## 1.5.0 - Unicode 12 ## 1.4.1 - Only bundle required lib/* and data/* files in actual rubygem, patch by @tas50 ## 1.4.0 - Unicode 11 ## 1.3.3 - Replace Gem::Util.gunzip with direct zlib implementation This removes the dependency on rubygems, fixes #17 ## 1.3.2 - Explicitly load rubygems/util, fixes regression in 1.3.1 (autoload issue) ## 1.3.1 - Use `Gem::Util` for `gunzip`, removes deprecation warning, patch by @Schwad ## 1.3.0 - Unicode 10 ## 1.2.1 - Fix bug that `emoji: true` would fail for emoji without modifier ## 1.2.0 - Add zero-width codepoint ranges: U+2060..U+206F, U+FFF0..U+FFF8, U+E0000..U+E0FFF - Add full-witdh codepoint ranges: U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2FFFD, U+30000..U+3FFFD - Experimental emoji support using the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem - Fix minor bug in index compression scheme ## 1.1.3 - Fix that non-UTF-8 encodings do not throw errors, patch by @windwiny ## 1.1.2 - Reduce memory consumption and increase performance, patch by @rrosenblum ## 1.1.1 - Always load index into memory, fixes #9 ## 1.1.0 - Support Unicode 9.0 ## 1.0.5 - Actually include new index from 1.0.4 ## 1.0.4 - New index format (much smaller) and internal API changes - Move index generation to a builder plugin for the unicoder gem - No public API changes ## 1.0.3 - Avoid circular dependency warning ## 1.0.2 - Fix error that gemspec might be invalid under some circumstances (see gh#6) ## 1.0.1 - Inofficially allow Ruby 1.9 ## 1.0.0 - Faster than 0.3.1 - Advanced determination of character width - This includes: Treat width of most chars of general categories (Mn, Me, Cf) as 0 - This includes: Introduce list of characters with special widths - Allow custom overrides for specific codepoints - Set required Ruby version to 2.0 - Add NO_STRING_EXT mode to disable monkey patching - Internal API & index format changed drastically - Remove require 'unicode/display_size' (use 'unicode/display_width' instead) ## 0.3.1 - Faster than 0.3.0 - Deprecate usage of aliases: String#display_size and String#display_length - Eliminate Ruby warnings (@amatsuda) ## 0.3.0 - Update EastAsianWidth from 7.0 to 8.0 - Add rake task to update EastAsianWidth.txt - Move code to generate index from library to Rakefile - Update project's meta files - Deprecate requiring 'unicode-display_size' ## 0.2.0 - Update EastAsianWidth from 6.0 to 7.0 - Don't build index table automatically when not available - Don't include EastAsianWidth.txt in gem (only index) ## 0.1.0 - Fix github issue #1 ## 0.1.0 - Initial release