pax_global_header00006660000000000000000000000064151506077100014514gustar00rootroot0000000000000052 comment=12ffa8c10a549460a5a61de8b20cd94cd5b1bd64 golang-github-clipperhouse-displaywidth-0.11.0+ds/000077500000000000000000000000001515060771000221275ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/.github/000077500000000000000000000000001515060771000234675ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/.github/workflows/000077500000000000000000000000001515060771000255245ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/.github/workflows/gofuzz.yml000066400000000000000000000007711515060771000276000ustar00rootroot00000000000000name: Fuzz on: push: branches: [ main ] pull_request: branches: [ main ] jobs: all: runs-on: ubuntu-latest strategy: matrix: fuzzer: [FuzzBytesAndString, FuzzRune, FuzzTruncateStringAndBytes, FuzzControlSequences] steps: - name: Check out code uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: cache: true - name: ${{ matrix.fuzzer }} run: go test -fuzz=${{ matrix.fuzzer }} -fuzztime=60s . golang-github-clipperhouse-displaywidth-0.11.0+ds/.github/workflows/gotest.yml000066400000000000000000000007461515060771000275630ustar00rootroot00000000000000name: Test on: push: branches: [ main ] pull_request: branches: [ main ] jobs: all: runs-on: ubuntu-latest strategy: matrix: go-version: ['1.20', '1.21', '1.22', '1.23', '1.24', '1.25'] steps: - name: Check out code uses: actions/checkout@v6 - name: Set up Go uses: actions/setup-go@v6 with: go-version: ${{ matrix.go-version }} cache: true - name: Run test run: go test ./... -race -short golang-github-clipperhouse-displaywidth-0.11.0+ds/.gitignore000066400000000000000000000000271515060771000241160ustar00rootroot00000000000000.DS_Store *.out *.test golang-github-clipperhouse-displaywidth-0.11.0+ds/AGENTS.md000066400000000000000000000045461515060771000234430ustar00rootroot00000000000000The goals and overview of this package can be found in the README.md file, start by reading that. The goal of this package is to determine the display (column) width of a string, UTF-8 bytes, or runes, as would happen in a monospace font, especially in a terminal. When troubleshooting, write Go unit tests instead of executing debug scripts. The tests can return whatever logs or output you need. If those tests are only for temporary troubleshooting, clean up the tests after the debugging is done. (Separate executable debugging scripts are messy, tend to have conflicting dependencies and are hard to cleanup.) If you make changes to the trie generation in internal/gen, it can be invoked by running `go generate` from the top package directory. ## Pull Requests and branches For PRs (pull requests), you can use the gh CLI tool. Compare the current branch with main. Reviewing a PR and reviewing a branch are about the same, but the PR may add context. Understand the goals of the PR. Note any API changes, especially breaking changes. Look for thoroughness of tests, as well as GoDoc comments. Retrieve and consider the comments on the PR, which may have come from GitHub Copilot or Cursor BugBot. Think like GitHub Copilot or Cursor BugBot. Offer to optionally post a brief summary of the review to the PR, via the gh CLI tool. ## Tagged Go releases If I ask you whether we are ready to release, this means a tagged Go release on the main branch. Go releases are git tagged with a version number. Review the changes since the last release, i.e. the previous git tag. Ensure that the changes are complete and correct. Identify new features, bug fixes, and performance improvements. Identify breaking changes, especially API changes. Ensure good test coverage. Look for performance changes, especially performance regressions, by running benchmarks against the previous release. Ensure that the documentation in READMEs and GoDocs are complete, correct and consistent. ## Comparisons to go-runewidth We originally attempted to make this package compatible with go-runewidth. However, we found that there were too many differences in the handling of certain characters and properties. We believe, preliminarily, that our choices are more correct and complete, by using more complete categories such as Unicode Cf (format) for zero-width and Mn (Nonspacing_Mark) for combining marks. golang-github-clipperhouse-displaywidth-0.11.0+ds/CHANGELOG.md000066400000000000000000000077151515060771000237520ustar00rootroot00000000000000# Changelog ## [0.11.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.10.0...v0.11.0) ### Added - New `ControlSequences8Bit` option to treat 8-bit ECMA-48 (C1) escape sequences as zero-width. (#22) ### Changed - Upgraded uax29 dependency to v2.7.0 for 8-bit escape sequence support in the grapheme iterator. - Truncation now validates that preserved trailing escape sequences are zero-width, preventing edge cases where non-zero-width sequences could leak into output. ### Note - `ControlSequences8Bit` is deliberately ignored by `TruncateString` and `TruncateBytes`, because C1 byte values (0x80–0x9F) overlap with UTF-8 multi-byte encoding. ## [0.10.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.9.0...v0.10.0) ### Added - New `ControlSequences` option to treat ECMA-48/ANSI escape sequences as zero-width. (#20) - `TruncateString` and `TruncateBytes` now preserve trailing ANSI escape sequences (such as SGR resets) when `ControlSequences` is true, preventing color bleed in terminal output. ### Changed - Removed `stringish` dependency; generic type constraints are now inline `~string | []byte`. - Upgraded uax29 dependency to v2.6.0 for ANSI escape sequence support in the grapheme iterator. ## [0.9.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.8.0...v0.9.0) ### Changed - Unicode 17 support: East Asian Width and emoji data updated to Unicode 17.0.0. (#18) - Upgraded uax29 dependency to v2.5.0 (Unicode 17 grapheme segmentation). ## [0.8.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.7.0...v0.8.0) ### Changed - Performance: ASCII fast path that applies to any run of printable ASCII. 2x-10x faster for ASCII text vs v0.7.0. (#16) - Upgraded uax29 dependency to v2.4.0 for Unicode 16 support. Text that includes Indic_Conjunct_Break may segment differently (and more correctly). (#15) ## [0.7.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.6.2...v0.7.0) ### Added - New `TruncateString` and `TruncateBytes` methods to truncate strings to a maximum display width, with optional tail (like an ellipsis). (#13) ## [0.6.2] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.6.1...v0.6.2) ### Changed - Internal: reduced property categories for simpler trie. ## [0.6.1] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.6.0...v0.6.1) ### Changed - Perf improvements: replaced the ASCII lookup table with a simple function. A bit more cache-friendly. More inlining. - Bug fix: single regional indicators are now treated as width 2, since that is what actual terminals do. ## [0.6.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.5.0...v0.6.0) ### Added - New `StringGraphemes` and `BytesGraphemes` methods, for iterating over the widths of grapheme clusters. ### Changed - Fast ASCII lookups ## [0.5.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.4.1...v0.5.0) ### Added - Unicode 16 support - Improved emoji presentation handling per Unicode TR51 ### Changed - Corrected VS15 (U+FE0E) handling: now preserves base character width (no-op) per Unicode TR51 - Performance optimizations: reduced property lookups ### Fixed - VS15 variation selector now correctly preserves base character width instead of forcing width 1 ## [0.4.1] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.4.0...v0.4.1) ### Changed - Updated uax29 dependency - Improved flag handling ## [0.4.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.3.1...v0.4.0) ### Added - Support for variation selectors (VS15, VS16) and regional indicator pairs (flags) ## [0.3.1] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.3.0...v0.3.1) ### Added - Fuzz testing support ### Changed - Updated stringish dependency ## [0.3.0] [Compare](https://github.com/clipperhouse/displaywidth/compare/v0.2.0...v0.3.0) ### Changed - Dropped compatibility with go-runewidth - Trie implementation cleanup golang-github-clipperhouse-displaywidth-0.11.0+ds/LICENSE000066400000000000000000000020551515060771000231360ustar00rootroot00000000000000MIT License Copyright (c) 2025 Matt Sherman Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. golang-github-clipperhouse-displaywidth-0.11.0+ds/README.md000066400000000000000000000173121515060771000234120ustar00rootroot00000000000000# displaywidth A high-performance Go package for measuring the monospace display width of strings, UTF-8 bytes, and runes. [![Documentation](https://pkg.go.dev/badge/github.com/clipperhouse/displaywidth.svg)](https://pkg.go.dev/github.com/clipperhouse/displaywidth) [![Test](https://github.com/clipperhouse/displaywidth/actions/workflows/gotest.yml/badge.svg)](https://github.com/clipperhouse/displaywidth/actions/workflows/gotest.yml) [![Fuzz](https://github.com/clipperhouse/displaywidth/actions/workflows/gofuzz.yml/badge.svg)](https://github.com/clipperhouse/displaywidth/actions/workflows/gofuzz.yml) ## Install ```bash go get github.com/clipperhouse/displaywidth ``` ## Usage ```go package main import ( "fmt" "github.com/clipperhouse/displaywidth" ) func main() { width := displaywidth.String("Hello, 世界!") fmt.Println(width) width = displaywidth.Bytes([]byte("ðŸŒ")) fmt.Println(width) width = displaywidth.Rune('ðŸŒ') fmt.Println(width) } ``` For most purposes, you should use the `String` or `Bytes` methods. They sum the widths of grapheme clusters in the string or byte slice. > Note: in your application, iterating over runes to measure width is likely incorrect; the smallest unit of display is a grapheme, not a rune. ### Iterating over graphemes If you need the individual graphemes: ```go import ( "fmt" "github.com/clipperhouse/displaywidth" ) func main() { g := displaywidth.StringGraphemes("Hello, 世界!") for g.Next() { width := g.Width() value := g.Value() // do something with the width or value } } ``` ### Options Create the options you need, and then use methods on the options struct. ```go var myOptions = displaywidth.Options{ EastAsianWidth: true, ControlSequences: true, } width := myOptions.String("Hello, 世界!") ``` #### ControlSequences `ControlSequences` specifies whether to ignore ECMA-48 escape sequences when calculating the display width. When `false` (default), ANSI escape sequences are treated as just a series of characters. When `true`, they are treated as a single zero-width unit. #### ControlSequences8Bit `ControlSequences8Bit` specifies whether to ignore 8-bit ECMA-48 escape sequences when calculating the display width. When `false` (default), these are treated as just a series of characters. When `true`, they are treated as a single zero-width unit. Note: this option is ignored by the `Truncate` methods, as the concatenation can lead to unintended UTF-8 semantics. #### EastAsianWidth `EastAsianWidth` defines how [East Asian Ambiguous characters](https://www.unicode.org/reports/tr11/#Ambiguous) are treated. When `false` (default), East Asian Ambiguous characters are treated as width 1. When `true`, they are treated as width 2. You may wish to configure this based on environment variables or locale. `go-runewidth`, for example, does so [during package initialization](https://github.com/mattn/go-runewidth/blob/master/runewidth.go#L26C1-L45C2). `displaywidth` does not do this automatically, we prefer to leave it to you. ## Technical standards and compatibility This package implements the Unicode East Asian Width standard ([UAX #11](https://www.unicode.org/reports/tr11/tr11-43.html)), and handles [version selectors](https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)), and [regional indicator pairs](https://en.wikipedia.org/wiki/Regional_indicator_symbol) (flags). We implement [Unicode TR51](https://www.unicode.org/reports/tr51/tr51-27.html) for emojis. We are keeping an eye on [emerging standards](https://www.jeffquast.com/post/state-of-terminal-emulation-2025/). For control sequences, we implement the [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) standard for 7-bit and 8-bit control sequences. `clipperhouse/displaywidth`, `mattn/go-runewidth`, and `rivo/uniseg` will give the same outputs for most real-world text. Extensive details are in the [compatibility analysis](comparison/COMPATIBILITY_ANALYSIS.md). ## Invalid UTF-8 This package does not validate UTF-8. If you pass invalid UTF-8, the results are undefined. We fuzz against invalid UTF-8 to ensure we don't panic or loop indefinitely. The `ControlSequences8Bit` option means that we will segment valid 8-bit control sequences, which are typically _not_ valid UTF-8. 8-bit control bytes happen to also be UTF-8 continuation bytes. Use with caution. ## Prior Art [mattn/go-runewidth](https://github.com/mattn/go-runewidth) [rivo/uniseg](https://github.com/rivo/uniseg) [x/text/width](https://pkg.go.dev/golang.org/x/text/width) [x/text/internal/triegen](https://pkg.go.dev/golang.org/x/text/internal/triegen) ## Benchmarks ```bash cd comparison go test -bench=. -benchmem ``` ``` goos: darwin goarch: arm64 pkg: github.com/clipperhouse/displaywidth/comparison cpu: Apple M2 BenchmarkString_Mixed/clipperhouse/displaywidth-8 5784 ns/op 291.69 MB/s 0 B/op 0 allocs/op BenchmarkString_Mixed/mattn/go-runewidth-8 14751 ns/op 114.36 MB/s 0 B/op 0 allocs/op BenchmarkString_Mixed/rivo/uniseg-8 19360 ns/op 87.14 MB/s 0 B/op 0 allocs/op BenchmarkString_ASCII/clipperhouse/displaywidth-8 54.60 ns/op 2344.32 MB/s 0 B/op 0 allocs/op BenchmarkString_ASCII/mattn/go-runewidth-8 1195 ns/op 107.08 MB/s 0 B/op 0 allocs/op BenchmarkString_ASCII/rivo/uniseg-8 1578 ns/op 81.13 MB/s 0 B/op 0 allocs/op BenchmarkString_EastAsian/clipperhouse/displaywidth-8 5837 ns/op 289.01 MB/s 0 B/op 0 allocs/op BenchmarkString_EastAsian/mattn/go-runewidth-8 24418 ns/op 69.09 MB/s 0 B/op 0 allocs/op BenchmarkString_EastAsian/rivo/uniseg-8 19339 ns/op 87.23 MB/s 0 B/op 0 allocs/op BenchmarkString_Emoji/clipperhouse/displaywidth-8 3225 ns/op 224.51 MB/s 0 B/op 0 allocs/op BenchmarkString_Emoji/mattn/go-runewidth-8 4851 ns/op 149.25 MB/s 0 B/op 0 allocs/op BenchmarkString_Emoji/rivo/uniseg-8 6591 ns/op 109.85 MB/s 0 B/op 0 allocs/op BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3385 ns/op 498.34 MB/s 0 B/op 0 allocs/op BenchmarkRune_Mixed/mattn/go-runewidth-8 5354 ns/op 315.07 MB/s 0 B/op 0 allocs/op BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3397 ns/op 496.56 MB/s 0 B/op 0 allocs/op BenchmarkRune_EastAsian/mattn/go-runewidth-8 15673 ns/op 107.64 MB/s 0 B/op 0 allocs/op BenchmarkRune_ASCII/clipperhouse/displaywidth-8 255.7 ns/op 500.53 MB/s 0 B/op 0 allocs/op BenchmarkRune_ASCII/mattn/go-runewidth-8 261.5 ns/op 489.55 MB/s 0 B/op 0 allocs/op BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1371 ns/op 528.22 MB/s 0 B/op 0 allocs/op BenchmarkRune_Emoji/mattn/go-runewidth-8 2267 ns/op 319.43 MB/s 0 B/op 0 allocs/op BenchmarkTruncateWithTail/clipperhouse/displaywidth-8 3229 ns/op 54.82 MB/s 192 B/op 14 allocs/op BenchmarkTruncateWithTail/mattn/go-runewidth-8 8408 ns/op 21.05 MB/s 192 B/op 14 allocs/op BenchmarkTruncateWithoutTail/clipperhouse/displaywidth-8 3554 ns/op 64.43 MB/s 0 B/op 0 allocs/op BenchmarkTruncateWithoutTail/mattn/go-runewidth-8 11189 ns/op 20.47 MB/s 0 B/op 0 allocs/op ``` Here are some notes on [how to make Unicode things fast](https://clipperhouse.com/go-unicode/). golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/000077500000000000000000000000001515060771000243015ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/COMPATIBILITY_ANALYSIS.md000066400000000000000000000127371515060771000301510ustar00rootroot00000000000000# Compatibility Analysis: displaywidth, go-runewidth, and uniseg > Generated by Cursor IDE using Claude Sonnet 4.5, and edited by @clipperhouse This document summarizes the compatibility findings between three Go libraries for Unicode string width calculation: - [clipperhouse/displaywidth](https://github.com/clipperhouse/displaywidth) (this package) - [mattn/go-runewidth](https://github.com/mattn/go-runewidth) - [rivo/uniseg](https://github.com/rivo/uniseg) ## Basic Unicode Categories Most Unicode categories show good compatibility. | Category | displaywidth | go-runewidth | uniseg | |----------|--------------|--------------|---------| | ASCII | ✅ Compatible | ✅ Compatible | ✅ Compatible | | Latin Extended | ✅ Compatible | ✅ Compatible | ✅ Compatible | | CJK (Chinese/Japanese/Korean) | ✅ Compatible | ✅ Compatible | ✅ Compatible | | Arabic | ✅ Compatible | ✅ Compatible | ✅ Compatible | | Combining Marks | ✅ Compatible | ✅ Compatible | ✅ Compatible | | Zero-Width Characters | ✅ Compatible | ✅ Compatible | ✅ Compatible | ## Emojis Regular emojis (😀, 🚀, 🎉, etc.) behave identically: | Library | Regular Emoji Width | |---------|---------------------| | **displaywidth** | Always 2 | | **go-runewidth** | Always 2 | | **uniseg** | Always 2 | ### Regional Indicator Pairs (Flags) Regional indicator pairs (flags like 🇺🇸) are composed of two Regional Indicator symbols. | Library | Behavior | |--------|----------| | **displaywidth** | Width 2 per flag | | **go-runewidth** | Width 1 per flag | | **uniseg** | Width 2 per flag | **Example:** `🇺🇸🇯🇵🇬🇧` (3 flags) - displaywidth: 6 columns (2+2+2) - go-runewidth: 3 columns (1+1+1) - uniseg: 6 columns (2+2+2) I (@clipperhouse) believe that 2 is the correct width, they are emojis. Ghostty and iTerm display regional flags as width 2, as does VS Code. Mac Terminal (Tahoe macOS 26) displays them as 1. Sigh. To repro in your terminal of choice: ``` echo "🇺🇸🇯🇵🇬🇧abc\n123456" ``` I have considered detecting the terminal (like a user agent) and using width 1 for Mac Terminal as a special case. I kinda hate that, because if Apple corrects it, then the behavior changes. OTOH, I assume Mac Terminal is the most popular terminal and so it might be better for end-users. ## Variation Selectors VS15 and VS16 from [Unicode TR51](https://unicode.org/reports/tr51/#Emoji_Variation_Sequences) | Library | VS16 (U+FE0F) | VS15 (U+FE0E) | |---------|---------------|---------------| | **displaywidth** | Forces emoji presentation (width 2) | No effect, preserves base width | | **go-runewidth** | Treated as separate character (width 1) | Treated as separate character (width 1) | | **uniseg** | Treated as part of emoji (width 2) | Forces width 1 | **Example:** `☺ï¸âŒ›ï¸Žâ¤ï¸` (3 emoji with variation selectors) - displaywidth: 6 columns - go-runewidth: 4 columns - uniseg: 5 columns I would appear to me (@clipperhouse) that the handling of VS15 is not widely agreed upon. Some libraries and standards (such as wcwidth) interpret it as "always narrow to width 1". Others (such as this library) interpret it as "no effect on width, use the base character width". Here is [a conversation on GitHub](https://github.com/contour-terminal/contour/discussions/1178#discussioncomment-6778716) and an [explanation from Grok](https://grok.com/share/bGVnYWN5LWNvcHk%3D_274f540c-c9a6-47c7-9d4f-47697ed20032). ## Keycap Sequences Keycap sequences like 1ï¸âƒ£ are formed by: base character + variation selector (U+FE0F) + combining enclosing keycap (U+20E3). | Library | Behavior | Width per Keycap | |---------|----------|------------------| | **displaywidth** | Treats as emoji | 2 columns | | **go-runewidth** | Treats base character | 1 column | | **uniseg** | Treats as base character | 1 column | **Example:** `1ï¸âƒ£#ï¸âƒ£` (2 keycap sequences) - displaywidth: 4 columns (2 per keycap) - go-runewidth: 2 columns (1 per keycap) - uniseg: 2 columns (1 per keycap) ## East Asian Ambiguous Width [East Asian Ambiguous characters](https://www.unicode.org/reports/tr11/#Ambiguous) (★, °, ±, etc.) can be rendered as either narrow (1 column) or wide (2 columns) depending on configuration. | Library | Default | With EastAsianWidth=true | |---------|---------|--------------------------| | **displaywidth** | Width 1 | Width 2 | | **go-runewidth** | Width 1 | Width 2 | | **uniseg** | Width 1 | Width 2 | **Example:** `★°±` (3 ambiguous characters) - displaywidth default: 3 columns - displaywidth with EastAsianWidth=true: 6 columns - go-runewidth default: 3 columns - go-runewidth with EastAsianWidth=true: 6 columns - uniseg default: 3 columns - uniseg with EastAsianAmbiguousWidth=2: 5 columns (usually) ## Detailed Test Results > Run `go test -v` in the `comparison/` directory to see comprehensive behavior comparisons between libraries. ### Test Case: "Hello 世界! 😀🇺🇸" **Breakdown:** - "Hello " = 6 columns (ASCII) - "世界" = 4 columns (CJK, 2 each) - "! " = 2 columns (ASCII) - "😀" = 2 columns (emoji) - "🇺🇸" = 1-2 columns (flag, depends on library) **Results:** - displaywidth: 16 columns (flag = 2 columns) - go-runewidth: 15 columns (flag = 1 column) - uniseg: 16 columns (flag = 2 columns) ### Test Case: "🚀🚀🚀" (3 rocket emoji) **Results:** - displaywidth: 6 columns (2 per emoji) - go-runewidth: 6 columns (2 per emoji) - uniseg: 6 columns (2 per emoji) ## Further Reading - [State of Terminal Emulation 2025](https://www.jeffquast.com/post/state-of-terminal-emulation-2025/) golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/README.md000066400000000000000000000064331515060771000255660ustar00rootroot00000000000000## Compatibility In real-world text, you should see the same outputs from `clipperhouse/displaywidth`, `mattn/go-runewidth`, and `rivo/uniseg`. The tests in this `comparison` package exercise the behaviors of the three libraries. Extensive details are available in the [compatibility analysis](COMPATIBILITY_ANALYSIS.md). ## Benchmarks ```bash go test -bench=. -benchmem ``` ``` goos: darwin goarch: arm64 pkg: github.com/clipperhouse/displaywidth/comparison cpu: Apple M2 BenchmarkString_Mixed/clipperhouse/displaywidth-8 5784 ns/op 291.69 MB/s 0 B/op 0 allocs/op BenchmarkString_Mixed/mattn/go-runewidth-8 14751 ns/op 114.36 MB/s 0 B/op 0 allocs/op BenchmarkString_Mixed/rivo/uniseg-8 19360 ns/op 87.14 MB/s 0 B/op 0 allocs/op BenchmarkString_ASCII/clipperhouse/displaywidth-8 54.60 ns/op 2344.32 MB/s 0 B/op 0 allocs/op BenchmarkString_ASCII/mattn/go-runewidth-8 1195 ns/op 107.08 MB/s 0 B/op 0 allocs/op BenchmarkString_ASCII/rivo/uniseg-8 1578 ns/op 81.13 MB/s 0 B/op 0 allocs/op BenchmarkString_EastAsian/clipperhouse/displaywidth-8 5837 ns/op 289.01 MB/s 0 B/op 0 allocs/op BenchmarkString_EastAsian/mattn/go-runewidth-8 24418 ns/op 69.09 MB/s 0 B/op 0 allocs/op BenchmarkString_EastAsian/rivo/uniseg-8 19339 ns/op 87.23 MB/s 0 B/op 0 allocs/op BenchmarkString_Emoji/clipperhouse/displaywidth-8 3225 ns/op 224.51 MB/s 0 B/op 0 allocs/op BenchmarkString_Emoji/mattn/go-runewidth-8 4851 ns/op 149.25 MB/s 0 B/op 0 allocs/op BenchmarkString_Emoji/rivo/uniseg-8 6591 ns/op 109.85 MB/s 0 B/op 0 allocs/op BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3385 ns/op 498.34 MB/s 0 B/op 0 allocs/op BenchmarkRune_Mixed/mattn/go-runewidth-8 5354 ns/op 315.07 MB/s 0 B/op 0 allocs/op BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3397 ns/op 496.56 MB/s 0 B/op 0 allocs/op BenchmarkRune_EastAsian/mattn/go-runewidth-8 15673 ns/op 107.64 MB/s 0 B/op 0 allocs/op BenchmarkRune_ASCII/clipperhouse/displaywidth-8 255.7 ns/op 500.53 MB/s 0 B/op 0 allocs/op BenchmarkRune_ASCII/mattn/go-runewidth-8 261.5 ns/op 489.55 MB/s 0 B/op 0 allocs/op BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1371 ns/op 528.22 MB/s 0 B/op 0 allocs/op BenchmarkRune_Emoji/mattn/go-runewidth-8 2267 ns/op 319.43 MB/s 0 B/op 0 allocs/op BenchmarkTruncateWithTail/clipperhouse/displaywidth-8 3229 ns/op 54.82 MB/s 192 B/op 14 allocs/op BenchmarkTruncateWithTail/mattn/go-runewidth-8 8408 ns/op 21.05 MB/s 192 B/op 14 allocs/op BenchmarkTruncateWithoutTail/clipperhouse/displaywidth-8 3554 ns/op 64.43 MB/s 0 B/op 0 allocs/op BenchmarkTruncateWithoutTail/mattn/go-runewidth-8 11189 ns/op 20.47 MB/s 0 B/op 0 allocs/op ``` golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/behavior_test.go000066400000000000000000000271231515060771000274730ustar00rootroot00000000000000package comparison import ( "testing" "github.com/clipperhouse/displaywidth" "github.com/mattn/go-runewidth" "github.com/rivo/uniseg" ) func TestLibraryBehaviorComparison(t *testing.T) { testCases := []struct { name string input string expected map[string]int // library -> expected width }{ // Basic ASCII { name: "ASCII text", input: "Hello World", expected: map[string]int{ "displaywidth_default": 11, "displaywidth_options{}": 11, "go-runewidth_default": 11, "uniseg_default": 11, }, }, // East Asian characters { name: "CJK characters", input: "中文", expected: map[string]int{ "displaywidth_default": 4, "displaywidth_options{}": 4, "go-runewidth_default": 4, "uniseg_default": 4, }, }, // Ambiguous characters (width depends on EastAsianWidth) { name: "Ambiguous characters", input: "★°±", expected: map[string]int{ "displaywidth_default": 3, "displaywidth_options{}": 3, "displaywidth_EAW": 6, "go-runewidth_default": 3, "go-runewidth_EAW": 6, "uniseg_default": 3, "uniseg_EAW": 5, // uniseg behavior is different }, }, // Emoji { name: "Basic emoji", input: "😀🚀🎉", expected: map[string]int{ "displaywidth_default": 6, "displaywidth_options{}": 6, "go-runewidth_default": 6, "uniseg_default": 6, }, }, // Unicode 16.0 new emojis { name: "Unicode 16.0 emojis", input: "🫩🫆🪾🫜🪉ðŸªðŸ«Ÿ", // Face with Bags Under Eyes, Fingerprint, Leafless Tree, Root Vegetable, Harp, Shovel, Splatter expected: map[string]int{ "displaywidth_default": 14, // 2 per emoji (properly handles Unicode 16.0) "displaywidth_options{}": 14, "go-runewidth_default": 7, // go-runewidth may not fully support Unicode 16.0 yet (treats as width 1) "uniseg_default": 7, // uniseg may not fully support Unicode 16.0 yet (treats as width 1) }, }, // Regional Indicator Pairs (flags) - the key difference // TODO: 2 is the correct width, that's what Ghostty and iTerm do. // Sadly, Mac Terminal displays width 1. Perhaps we should special-case // it, not sure. { name: "Flags", input: "🇺🇸🇯🇵🇬🇧", expected: map[string]int{ "displaywidth_default": 6, // flags are always width 2 (modern standard) "displaywidth_options{}": 6, // same as default "go-runewidth_default": 3, // go-runewidth treats flags as width 1 "go-runewidth_strict_false": 3, "go-runewidth_strict_true": 3, // go-runewidth always returns 1 for flags "uniseg_default": 6, // uniseg treats flags as width 2 }, }, // Single Regional Indicator { name: "Single Regional Indicator", input: "🇺", expected: map[string]int{ "displaywidth_default": 2, "displaywidth_options{}": 2, "go-runewidth_default": 1, "uniseg_default": 2, }, }, // Variation selectors // â˜ºï¸ (U+263A + VS16) = width 2, ⌛︎ (U+231B + VS15) = width 2 (VS15 is no-op), â¤ï¸ (U+2764 + VS16) = width 2 { name: "Variation selectors", input: "☺ï¸âŒ›ï¸Žâ¤ï¸", expected: map[string]int{ "displaywidth_default": 6, // 2 + 2 + 2 (VS15 is no-op per Unicode TR51) "displaywidth_options{}": 6, "go-runewidth_default": 4, "uniseg_default": 5, // uniseg still treats VS15 as width 1 }, }, // Keycap sequences { name: "Keycap sequences", input: "1ï¸âƒ£#ï¸âƒ£", expected: map[string]int{ "displaywidth_default": 4, "displaywidth_options{}": 4, "go-runewidth_default": 2, "uniseg_default": 2, }, }, // Mixed content { name: "Mixed content", input: "Hello 世界! 😀🇺🇸", expected: map[string]int{ "displaywidth_default": 16, // 6 + 4 + 2 + 2 + 2 "displaywidth_options{}": 16, // same as default "go-runewidth_default": 15, // 6 + 4 + 2 + 2 + 1 (flags are width 1) "uniseg_default": 16, // 6 + 4 + 2 + 2 + 2 }, }, // Control characters { name: "Control characters", input: "hello\nworld\t", expected: map[string]int{ "displaywidth_default": 10, // newline and tab are width 0 "displaywidth_options{}": 10, "go-runewidth_default": 10, "uniseg_default": 10, }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { // Test displaywidth with default options displaywidthDefault := displaywidth.String(tc.input) if expected, ok := tc.expected["displaywidth_default"]; ok { if displaywidthDefault != expected { t.Errorf("displaywidth.String() = %d, want %d", displaywidthDefault, expected) } } // Test displaywidth with zero-value options (should behave same as default) displaywidthZero := displaywidth.String(tc.input) if expected, ok := tc.expected["displaywidth_options{}"]; ok { if displaywidthZero != expected { t.Errorf("displaywidth.String() = %d, want %d", displaywidthZero, expected) } } // Test displaywidth with EastAsianWidth=true displaywidthEAW := displaywidth.Options{EastAsianWidth: true}.String(tc.input) if expected, ok := tc.expected["displaywidth_EAW"]; ok { if displaywidthEAW != expected { t.Errorf("displaywidth.Options{EastAsianWidth: true}.String() = %d, want %d", displaywidthEAW, expected) } } // Test go-runewidth default goRunewidthDefault := runewidth.StringWidth(tc.input) if expected, ok := tc.expected["go-runewidth_default"]; ok { if goRunewidthDefault != expected { t.Errorf("runewidth.StringWidth() = %d, want %d", goRunewidthDefault, expected) } } // Test go-runewidth with StrictEmojiNeutral=false goRunewidthStrictFalse := (&runewidth.Condition{StrictEmojiNeutral: false}).StringWidth(tc.input) if expected, ok := tc.expected["go-runewidth_strict_false"]; ok { if goRunewidthStrictFalse != expected { t.Errorf("runewidth.Condition{StrictEmojiNeutral: false}.StringWidth() = %d, want %d", goRunewidthStrictFalse, expected) } } // Test go-runewidth with StrictEmojiNeutral=true goRunewidthStrictTrue := (&runewidth.Condition{StrictEmojiNeutral: true}).StringWidth(tc.input) if expected, ok := tc.expected["go-runewidth_strict_true"]; ok { if goRunewidthStrictTrue != expected { t.Errorf("runewidth.Condition{StrictEmojiNeutral: true}.StringWidth() = %d, want %d", goRunewidthStrictTrue, expected) } } // Test go-runewidth with EastAsianWidth=true goRunewidthEAW := (&runewidth.Condition{EastAsianWidth: true}).StringWidth(tc.input) if expected, ok := tc.expected["go-runewidth_EAW"]; ok { if goRunewidthEAW != expected { t.Errorf("runewidth.Condition{EastAsianWidth: true}.StringWidth() = %d, want %d", goRunewidthEAW, expected) } } // Test uniseg default unisegDefault := uniseg.StringWidth(tc.input) if expected, ok := tc.expected["uniseg_default"]; ok { if unisegDefault != expected { t.Errorf("uniseg.StringWidth() = %d, want %d", unisegDefault, expected) } } // Test uniseg with EastAsianWidth=true originalEAW := uniseg.EastAsianAmbiguousWidth uniseg.EastAsianAmbiguousWidth = 2 unisegEAW := uniseg.StringWidth(tc.input) uniseg.EastAsianAmbiguousWidth = originalEAW if expected, ok := tc.expected["uniseg_EAW"]; ok { if unisegEAW != expected { t.Errorf("uniseg.StringWidth() with EastAsianAmbiguousWidth=2 = %d, want %d", unisegEAW, expected) } } }) } } func TestFlagBehaviorDetailed(t *testing.T) { flags := []string{"🇺🇸", "🇯🇵", "🇬🇧", "🇫🇷", "🇩🇪"} t.Log("Flag behavior comparison:") t.Log("Library | displaywidth | go-runewidth (default) | go-runewidth (strict=false) | go-runewidth (strict=true) | uniseg") t.Log("--------|--------------|------------------------|----------------------------|----------------------------|-------") for _, flag := range flags { // displaywidth (always width 2, no StrictEmojiNeutral option) displaywidthDefault := displaywidth.String(flag) // go-runewidth goRunewidthDefault := runewidth.StringWidth(flag) goRunewidthStrictFalse := (&runewidth.Condition{StrictEmojiNeutral: false}).StringWidth(flag) goRunewidthStrictTrue := (&runewidth.Condition{StrictEmojiNeutral: true}).StringWidth(flag) // uniseg unisegDefault := uniseg.StringWidth(flag) t.Logf("%s | %d | %d | %d | %d | %d", flag, displaywidthDefault, goRunewidthDefault, goRunewidthStrictFalse, goRunewidthStrictTrue, unisegDefault) } } func TestTruncateComparison(t *testing.T) { testCases := []struct { name string input string maxWidth int tail string }{ { name: "ASCII truncation", input: "Hello World", maxWidth: 5, tail: "...", }, { name: "CJK truncation", input: "中文测试", maxWidth: 4, tail: "...", }, { name: "Emoji truncation", input: "😀🚀🎉", maxWidth: 4, tail: "...", }, { name: "Flags truncation", input: "🇺🇸🇯🇵🇬🇧", // known difference maxWidth: 4, tail: "...", }, { name: "Mixed content truncation", input: "Hello 世界! 😀🇺🇸", maxWidth: 10, tail: "...", }, { name: "No truncation needed", input: "Hi", maxWidth: 10, tail: "...", }, { name: "Empty tail", input: "Hello World", maxWidth: 5, tail: "", }, { name: "Width exactly equal to string width", input: "Hello", maxWidth: 5, tail: "...", }, { name: "Width exactly equal to string width with emoji", input: "😀🚀", maxWidth: 4, tail: "...", }, { name: "Width exactly equal to string width with CJK", input: "中文", maxWidth: 4, tail: "...", }, { name: "MaxWidth is 0", input: "Hello", maxWidth: 0, tail: "...", }, { name: "MaxWidth is 1", input: "Hello", maxWidth: 1, tail: "...", }, { name: "MaxWidth is 2", input: "Hello", maxWidth: 2, tail: "...", }, { name: "Empty string input", input: "", maxWidth: 5, tail: "...", }, { name: "Tail wider than maxWidth", input: "Hello", maxWidth: 2, tail: "中文", // width 4, wider than maxWidth }, { name: "Tail with emoji", input: "Hello", maxWidth: 5, tail: "😀", }, { name: "MaxWidth exactly equal to tail width", input: "Hello World", maxWidth: 3, // exactly width of "..." tail: "...", }, { name: "Input with control characters", input: "hello\nworld", maxWidth: 8, tail: "...", }, { name: "Single wide character truncation", input: "中", maxWidth: 1, tail: "...", }, { name: "Single emoji truncation", input: "😀", maxWidth: 1, tail: "...", }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { // displaywidth displaywidthResult := displaywidth.TruncateString(tc.input, tc.maxWidth, tc.tail) displaywidthWidth := displaywidth.String(displaywidthResult) // go-runewidth goRunewidthResult := runewidth.Truncate(tc.input, tc.maxWidth, tc.tail) goRunewidthWidth := runewidth.StringWidth(goRunewidthResult) if displaywidthWidth != goRunewidthWidth { t.Logf("displaywidth and go-runewidth results differ for %q: %d != %d", tc.input, displaywidthWidth, goRunewidthWidth) } if displaywidthResult != goRunewidthResult { t.Logf("displaywidth and go-runewidth results differ for %s : %s != %s", tc.input, displaywidthResult, goRunewidthResult) } }) } } golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/benchmark_test.go000066400000000000000000000305651515060771000276320ustar00rootroot00000000000000package comparison import ( "strings" "testing" "github.com/clipperhouse/displaywidth" "github.com/clipperhouse/displaywidth/testdata" "github.com/mattn/go-runewidth" "github.com/rivo/uniseg" ) // TestCase represents a test case from the test_cases.txt file type TestCase struct { Name string Input string } // loadTestCases reads and parses test cases from test_cases.txt func loadTestCases() ([]TestCase, int64, error) { file, err := testdata.TestCases() if err != nil { return nil, 0, err } var testCases []TestCase lines := strings.Split(string(file), "\n") for _, line := range lines { // Skip empty lines and comments if line == "" || strings.HasPrefix(line, "#") { continue } // Handle special cases with colons (like "newline:", "tab:", etc.) if strings.HasSuffix(line, ":") { name := strings.TrimSuffix(line, ":") var input string switch name { case "newline": input = "\n" case "tab": input = "\t" case "carriage return": input = "\r" case "backspace": input = "\b" case "null": input = "\x00" case "del": input = "\x7f" case "Zero Width Space": input = "\u200b" case "Zero Width Joiner": input = "\u200d" case "Zero Width Non-Joiner": input = "\u200c" case "Empty string": input = "" case "Single space": input = " " case "Multiple spaces": input = " " case "Tab and newline": input = "\t\n" case "Mixed whitespace": input = " \t \n " default: // For other cases, use the name as input input = name } testCases = append(testCases, TestCase{ Name: name, Input: input, }) } else { // Regular test case - use the line as both name and input testCases = append(testCases, TestCase{ Name: line, Input: line, }) } } totalBytes := 0 for _, tc := range testCases { totalBytes += len(tc.Input) } return testCases, int64(totalBytes), nil } var ( // Shared test data for benchmarks asciiTestStrings = []string{ "hello", "Hello World", "1234567890", "!@#$%^&*()", "This is a very long string with many characters to test performance of both implementations.", } emojiTestStrings = []string{ "😀 😠😂 🤣 😃 😄 😅 😆 😉 😊", "🚀 🎉 🎊 🎈 🎠🎂 🎃 🎄 🎆 🎇", "👨â€ðŸ‘©â€ðŸ‘§â€ðŸ‘¦ 👨â€ðŸ’» 👩â€ðŸ”¬ 👨â€ðŸŽ¨ 👩â€ðŸš€", "🇺🇸 🇬🇧 🇫🇷 🇩🇪 🇯🇵 🇰🇷 🇨🇳", "Hello 世界! 😀", "👨â€ðŸ’» working on 🚀", "😀ðŸ˜ðŸ˜‚🤣😃😄😅😆😉😊😋😎ðŸ˜ðŸ˜˜ðŸ¥°ðŸ˜—😙😚☺ï¸ðŸ™‚🤗🤩🤔🤨ðŸ˜ðŸ˜‘😶🙄ðŸ˜ðŸ˜£ðŸ˜¥ðŸ˜®ðŸ¤ðŸ˜¯ðŸ˜ªðŸ˜«ðŸ¥±ðŸ˜´ðŸ˜ŒðŸ˜›ðŸ˜œðŸ˜ðŸ¤¤ðŸ˜’😓😔😕🙃🤑😲☹ï¸ðŸ™ðŸ˜–😞😟😤😢😭😦😧😨😩🤯😬😰😱🥵🥶😳🤪😵😡😠🤬😷🤒🤕🤢🤮🤧😇🤠🤡🥳🥴🥺🤥🤫🤭ðŸ§ðŸ¤“😈👿💀☠ï¸ðŸ‘¹ðŸ‘ºðŸ¤–👽👾💩😺😸😹😻😼😽🙀😿😾", } ) // BenchmarkString_Mixed benchmarks our displaywidth package func BenchmarkString_Mixed(b *testing.B) { b.Run("clipperhouse/displaywidth", func(b *testing.B) { testCases, n, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, tc := range testCases { // Test with default settings (eastAsianWidth=false) _ = displaywidth.String(tc.Input) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { testCases, n, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, tc := range testCases { _ = runewidth.StringWidth(tc.Input) } } }) b.Run("rivo/uniseg", func(b *testing.B) { testCases, n, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, tc := range testCases { _ = uniseg.StringWidth(tc.Input) } } }) } func BenchmarkString_EastAsian(b *testing.B) { options := displaywidth.Options{ EastAsianWidth: true, } condition := &runewidth.Condition{ EastAsianWidth: true, } // Save original value and restore after benchmark originalEAAWidth := uniseg.EastAsianAmbiguousWidth defer func() { uniseg.EastAsianAmbiguousWidth = originalEAAWidth }() b.Run("clipperhouse/displaywidth", func(b *testing.B) { testCases, n, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, tc := range testCases { // Test with East Asian Width enabled _ = options.String(tc.Input) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { testCases, n, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, tc := range testCases { _ = condition.StringWidth(tc.Input) } } }) b.Run("rivo/uniseg", func(b *testing.B) { // Set EastAsianAmbiguousWidth to 2 to match the other libraries uniseg.EastAsianAmbiguousWidth = 2 defer func() { uniseg.EastAsianAmbiguousWidth = 1 }() testCases, n, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, tc := range testCases { _ = uniseg.StringWidth(tc.Input) } } }) } // BenchmarkString_ASCII benchmarks ASCII-only strings func BenchmarkString_ASCII(b *testing.B) { n := 0 for _, s := range asciiTestStrings { n += len(s) } b.Run("clipperhouse/displaywidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range asciiTestStrings { _ = displaywidth.String(s) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range asciiTestStrings { _ = runewidth.StringWidth(s) } } }) b.Run("rivo/uniseg", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range asciiTestStrings { _ = uniseg.StringWidth(s) } } }) } // BenchmarkString_Emoji benchmarks emoji strings func BenchmarkString_Emoji(b *testing.B) { n := 0 for _, s := range emojiTestStrings { n += len(s) } b.Run("clipperhouse/displaywidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range emojiTestStrings { _ = displaywidth.String(s) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range emojiTestStrings { _ = runewidth.StringWidth(s) } } }) b.Run("rivo/uniseg", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range emojiTestStrings { _ = uniseg.StringWidth(s) } } }) } // BenchmarkRune_Mixed benchmarks rune width calculation using test cases func BenchmarkRune_Mixed(b *testing.B) { testCases, _, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } // Convert all strings to []rune var testRunes []rune n := 0 for _, tc := range testCases { runes := []rune(tc.Input) testRunes = append(testRunes, runes...) n += len(tc.Input) } b.Run("clipperhouse/displaywidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range testRunes { _ = displaywidth.Rune(r) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range testRunes { _ = runewidth.RuneWidth(r) } } }) } // BenchmarkRune_EastAsian benchmarks rune width with East Asian Width option func BenchmarkRune_EastAsian(b *testing.B) { options := displaywidth.Options{ EastAsianWidth: true, } condition := &runewidth.Condition{ EastAsianWidth: true, } testCases, _, err := loadTestCases() if err != nil { b.Fatalf("Failed to load test cases: %v", err) } // Convert all strings to []rune var testRunes []rune n := 0 for _, tc := range testCases { runes := []rune(tc.Input) testRunes = append(testRunes, runes...) n += len(tc.Input) } b.Run("clipperhouse/displaywidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range testRunes { _ = options.Rune(r) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range testRunes { _ = condition.RuneWidth(r) } } }) } // BenchmarkRune_ASCII benchmarks ASCII rune width calculation func BenchmarkRune_ASCII(b *testing.B) { // Convert ASCII strings to []rune var asciiRunes []rune n := 0 for _, s := range asciiTestStrings { runes := []rune(s) asciiRunes = append(asciiRunes, runes...) n += len(s) } b.Run("clipperhouse/displaywidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range asciiRunes { _ = displaywidth.Rune(r) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range asciiRunes { _ = runewidth.RuneWidth(r) } } }) } // BenchmarkRune_Emoji benchmarks emoji rune width calculation func BenchmarkRune_Emoji(b *testing.B) { // Convert emoji strings to []rune var emojiRunes []rune n := 0 for _, s := range emojiTestStrings { runes := []rune(s) emojiRunes = append(emojiRunes, runes...) n += len(s) } b.Run("clipperhouse/displaywidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range emojiRunes { _ = displaywidth.Rune(r) } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(int64(n)) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, r := range emojiRunes { _ = runewidth.RuneWidth(r) } } }) } func BenchmarkTruncateWithTail(b *testing.B) { testStrings := []string{ "hello world", "This is a very long string that will definitely be truncated", "Hello 世界! 😀", "👨â€ðŸ’» working on 🚀", "中文字符串测试", "😀ðŸ˜ðŸ˜‚🤣😃😄😅😆😉😊", } n := int64(0) for _, s := range testStrings { n += int64(len(s)) } maxWidths := []int{5, 10, 20, 30} tail := "..." b.Run("clipperhouse/displaywidth", func(b *testing.B) { options := displaywidth.Options{} b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range testStrings { for _, w := range maxWidths { _ = options.TruncateString(s, w, tail) } } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range testStrings { for _, w := range maxWidths { _ = runewidth.Truncate(s, w, tail) } } } }) } func BenchmarkTruncateWithoutTail(b *testing.B) { testStrings := []string{ "hello world", "This is a very long string that will definitely be truncated", "Hello 世界! 😀", "👨â€ðŸ’» working on 🚀", "a very long string that will definitely be truncated", "中文字符串测试", "😀ðŸ˜ðŸ˜‚🤣😃😄😅😆😉😊", } n := int64(0) for _, s := range testStrings { n += int64(len(s)) } maxWidths := []int{5, 10, 20, 30} b.Run("clipperhouse/displaywidth", func(b *testing.B) { options := displaywidth.Options{} b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range testStrings { for _, w := range maxWidths { _ = options.TruncateString(s, w, "") } } } }) b.Run("mattn/go-runewidth", func(b *testing.B) { b.SetBytes(n) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { for _, s := range testStrings { for _, w := range maxWidths { _ = runewidth.Truncate(s, w, "") } } } }) } golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/compatibility_test.go000066400000000000000000000023071515060771000305420ustar00rootroot00000000000000package comparison import ( "testing" "unicode" "github.com/clipperhouse/displaywidth" "github.com/mattn/go-runewidth" ) // Investigate differences between displaywidth and go-runewidth. // Not meant to be a real test, more of a tool, but good to // fail if something changes in the future. func TestAllRunes(t *testing.T) { t.Skip("skipping compatibility test") if unicode.Version < "15" { // We only care about Unicode 15 and above, // which I believe was Go version 1.21. return } for r := rune(0); r <= unicode.MaxRune; r++ { w1 := displaywidth.Rune(r) w2 := runewidth.RuneWidth(r) if w1 != w2 { if unicode.Is(unicode.Mn, r) { // these are in the trie, known // we will return width 0, // go-runewidth may return width 1 continue } if unicode.Is(unicode.Cf, r) { // these are in the trie, known // we will return width 0, // go-runewidth may return width 1 continue } if unicode.Is(unicode.Mc, r) { // these are deliberately excluded from the trie, known // we will return width 1, // go-runewidth may return width 0 continue } t.Errorf("%#x: runewidth is %d, displaywidth is %d, difference is %d", r, w2, w1, w2-w1) } } } golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/go.mod000066400000000000000000000004631515060771000254120ustar00rootroot00000000000000module github.com/clipperhouse/displaywidth/comparison go 1.18 require ( github.com/clipperhouse/displaywidth v0.10.0 github.com/mattn/go-runewidth v0.0.19 github.com/rivo/uniseg v0.4.7 ) require github.com/clipperhouse/uax29/v2 v2.7.0 // indirect replace github.com/clipperhouse/displaywidth => ../ golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/go.sum000066400000000000000000000010151515060771000254310ustar00rootroot00000000000000github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= golang-github-clipperhouse-displaywidth-0.11.0+ds/comparison/strictemojineutral_test.go000066400000000000000000000067211515060771000316240ustar00rootroot00000000000000package comparison import ( "testing" "github.com/clipperhouse/displaywidth" "github.com/mattn/go-runewidth" ) // TestRegularEmojiAreAlwaysWidth2 explicitly verifies that regular emojis // are always width 2 func TestRegularEmojiAreAlwaysWidth2(t *testing.T) { // Regular emojis WITHOUT variation selectors (VS16) // go-runewidth handles VS16 differently, treating it as a separate character emojis := []string{ "😀", "ðŸ˜", "😂", "🤣", "😃", "😄", "😅", "😆", "🚀", "🎉", "🎊", "🎈", "ðŸŽ", "🎂", "ðŸ‘", "👎", "ðŸ‘", "ðŸ™", "🔥", "💯", "✨", "â­", } t.Log("Verifying that regular emojis are ALWAYS width 2:") t.Log("(Note: excludes emojis with VS16, which go-runewidth treats differently)") t.Log("") allPass := true for _, emoji := range emojis { // displaywidth (always width 2, no StrictEmojiNeutral option) dw := displaywidth.String(emoji) // go-runewidth (always width 2 regardless of StrictEmojiNeutral) gr1 := (&runewidth.Condition{StrictEmojiNeutral: true}).StringWidth(emoji) gr2 := (&runewidth.Condition{StrictEmojiNeutral: false}).StringWidth(emoji) if dw != 2 || gr1 != 2 || gr2 != 2 { t.Errorf("%s: Expected width 2 in all cases, got displaywidth=%d, go-runewidth(strict=true)=%d, go-runewidth(strict=false)=%d", emoji, dw, gr1, gr2) allPass = false } } if allPass { t.Log("✅ All regular emojis have width 2") } // Document the difference with VS16 t.Log("") t.Log("Variation Selector Difference:") emojiWithVS16 := []string{"â¤ï¸", "✂ï¸", "☺ï¸"} for _, emoji := range emojiWithVS16 { dw := displaywidth.String(emoji) gr := runewidth.StringWidth(emoji) t.Logf(" %s: displaywidth=%d, go-runewidth=%d (go-runewidth treats VS16 as separate char)", emoji, dw, gr) } } // TestFlagsBehavior documents flag behavior across libraries func TestFlagsBehavior(t *testing.T) { flags := []string{ "🇺🇸", "🇯🇵", "🇬🇧", "🇫🇷", "🇩🇪", "🇨🇦", "🇦🇺", "🇧🇷", "🇮🇳", "🇨🇳", } t.Log("Flag behavior comparison:") t.Log("(displaywidth follows modern standards: flags are always width 2)") t.Log("") t.Log("Flag | displaywidth | go-runewidth (default) | go-runewidth (strict=false) | go-runewidth (strict=true)") t.Log("-----|-------------|------------------------|---------------------------|-------------------------") for _, flag := range flags { // displaywidth (always width 2, no StrictEmojiNeutral option) dw := displaywidth.String(flag) // go-runewidth (always width 1, regardless of StrictEmojiNeutral) grDefault := runewidth.StringWidth(flag) gr1 := (&runewidth.Condition{StrictEmojiNeutral: false}).StringWidth(flag) gr2 := (&runewidth.Condition{StrictEmojiNeutral: true}).StringWidth(flag) t.Logf("%s | %d | %d | %d | %d", flag, dw, grDefault, gr1, gr2) // Verify displaywidth behavior (always width 2) if dw != 2 { t.Errorf("%s: displaywidth should always be 2, got %d", flag, dw) } // Document go-runewidth behavior (always 1) if grDefault != 1 { t.Errorf("%s: go-runewidth default should be 1, got %d", flag, grDefault) } if gr1 != 1 { t.Errorf("%s: go-runewidth with strict=false should be 1, got %d", flag, gr1) } if gr2 != 1 { t.Errorf("%s: go-runewidth with strict=true should be 1, got %d", flag, gr2) } } t.Log("") t.Log("Summary:") t.Log("- displaywidth: flags are always width 2 (modern standard)") t.Log("- go-runewidth: flags are always width 1, regardless of StrictEmojiNeutral") } golang-github-clipperhouse-displaywidth-0.11.0+ds/fuzz_test.go000066400000000000000000000322231515060771000245150ustar00rootroot00000000000000package displaywidth import ( "bytes" "strings" "testing" "unicode/utf8" "github.com/clipperhouse/displaywidth/testdata" ) // FuzzBytesAndString fuzzes the Bytes function with valid and invalid UTF-8. func FuzzBytesAndString(f *testing.F) { if testing.Short() { f.Skip("skipping fuzz test in short mode") } // Seed with multi-lingual text (paragraph-sized chunks) file, err := testdata.Sample() if err != nil { f.Fatal(err) } chunks := bytes.Split(file, []byte("\n")) for _, chunk := range chunks { f.Add(chunk) } // Seed with invalid UTF-8 invalid, err := testdata.InvalidUTF8() if err != nil { f.Fatal(err) } chunks = bytes.Split(invalid, []byte("\n")) for _, chunk := range chunks { f.Add(chunk) } // Seed with test cases testCases, err := testdata.TestCases() if err != nil { f.Fatal(err) } chunks = bytes.Split(testCases, []byte("\n")) for _, chunk := range chunks { f.Add(chunk) } // Seed with random bytes for i := 0; i < 10; i++ { b, err := testdata.RandomBytes() if err != nil { f.Fatal(err) } f.Add(b) } // Seed with edge cases f.Add([]byte("")) // empty f.Add([]byte("a")) // single ASCII f.Add([]byte("\x00")) // null byte f.Add([]byte("\t\n\r")) // whitespace f.Add([]byte("ðŸŒ")) // emoji f.Add([]byte("\u0301")) // combining mark f.Add([]byte{0xff, 0xfe, 0xfd}) // invalid UTF-8 f.Fuzz(func(t *testing.T, text []byte) { // Test with default options wb := Bytes(text) // Invariant: width should never be negative if wb < 0 { t.Errorf("Bytes() returned negative width for %q: %d", text, wb) } // Invariant: empty input should always return 0 if len(text) == 0 && wb != 0 { t.Errorf("Bytes() returned non-zero width %d for empty input", wb) } // Invariant: for valid UTF-8, width should never exceed input length // (each byte is at most 1 column wide, some are 0, some multi-byte chars are 2) if utf8.Valid(text) { runeCount := utf8.RuneCount(text) if wb > len(text) { t.Errorf("Bytes() width %d exceeds byte length %d for valid UTF-8: %q", wb, len(text), text) } // Also shouldn't exceed rune count * 2 (max width per rune is 2) if wb > runeCount*2 { t.Errorf("Bytes() width %d exceeds rune count * 2 (%d) for %q", wb, runeCount*2, text) } // Consistency check: String() and Bytes() should agree on valid UTF-8 ws := String(string(text)) if wb != ws { t.Errorf("Bytes() returned %d but String() returned %d for %q", wb, ws, text) } } // Test with different options combinations options := []Options{ {EastAsianWidth: false}, {EastAsianWidth: true}, {ControlSequences: true}, {ControlSequences8Bit: true}, {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, option := range options { wb := option.Bytes(text) // Same invariants apply if wb < 0 { t.Errorf("Bytes() with options %+v returned negative width for %q: %d", option, text, wb) } if len(text) == 0 && wb != 0 { t.Errorf("Bytes() with options %+v returned non-zero width %d for empty input", option, wb) } ws := option.String(string(text)) if wb != ws { t.Errorf("Bytes() returned %d but String() returned %d with options %+v for %q", wb, ws, option, text) } } }) } // FuzzRune fuzzes the Rune function. func FuzzRune(f *testing.F) { if testing.Short() { f.Skip("skipping fuzz test in short mode") } // Seed with interesting runes seeds := []rune{ 0, // null ' ', // space 'A', // ASCII '\t', // tab '\n', // newline '\u0000', // null '\u0301', // combining acute accent '\u00A0', // non-breaking space '\u2028', // line separator '\u2029', // paragraph separator '\uFEFF', // zero-width no-break space '\uFFFD', // replacement character '\uFFFE', // noncharacter '\uFFFF', // noncharacter '世', // CJK '界', // CJK 'ðŸŒ', // emoji '👨', // emoji 0xD800, // surrogate (invalid) 0xDFFF, // surrogate (invalid) 0x10FFFF, // max valid rune } for _, r := range seeds { f.Add(r) } f.Fuzz(func(t *testing.T, r rune) { // Test with default options wr := Rune(r) // Invariant: width should never be negative if wr < 0 { t.Errorf("Rune() returned negative width for %U (%c): %d", r, r, wr) } // Invariant: width should be 0, 1, or 2 if wr > 2 { t.Errorf("Rune() returned invalid width for %U (%c): %d (expected 0, 1, or 2)", r, r, wr) } // Consistency check: compare with Bytes/String for valid runes if utf8.ValidRune(r) { var buf [4]byte n := utf8.EncodeRune(buf[:], r) wb := Bytes(buf[:n]) if wr != wb { t.Errorf("Rune() returned %d but Bytes() returned %d for %U (%c)", wr, wb, r, r) } ws := String(string(r)) if wr != ws { t.Errorf("Rune() returned %d but String() returned %d for %U (%c)", wr, ws, r, r) } } // Test with different options (Rune is per-rune, ControlSequences // doesn't affect single runes, but we include it for completeness) options := []Options{ {EastAsianWidth: false}, {EastAsianWidth: true}, {ControlSequences: true}, {EastAsianWidth: true, ControlSequences: true}, } for _, option := range options { wr := option.Rune(r) // Same invariants apply if wr < 0 || wr > 2 { t.Errorf("Rune() with options %+v returned invalid width for %U (%c): %d", option, r, r, wr) } // Consistency check with Bytes/String for valid runes if utf8.ValidRune(r) { var buf [4]byte n := utf8.EncodeRune(buf[:], r) wb := option.Bytes(buf[:n]) if wr != wb { t.Errorf("Rune() returned %d but Bytes() returned %d with options %+v for %U (%c)", wr, wb, option, r, r) } ws := option.String(string(r)) if wr != ws { t.Errorf("Rune() returned %d but String() returned %d with options %+v for %U (%c)", wr, ws, option, r, r) } } } }) } func FuzzTruncateStringAndBytes(f *testing.F) { if testing.Short() { f.Skip("skipping fuzz test in short mode") } // Seed with multi-lingual text (paragraph-sized chunks) file, err := testdata.Sample() if err != nil { f.Fatal(err) } fs := string(file) chunks := strings.Split(fs, "\n") for _, chunk := range chunks { f.Add(chunk) } // Seed with invalid UTF-8 invalid, err := testdata.InvalidUTF8() if err != nil { f.Fatal(err) } fs = string(invalid) chunks = strings.Split(fs, "\n") for _, chunk := range chunks { f.Add(chunk) } // Seed with test cases testCases, err := testdata.TestCases() if err != nil { f.Fatal(err) } fs = string(testCases) chunks = strings.Split(fs, "\n") for _, chunk := range chunks { f.Add(chunk) } // Seed with random bytes for i := 0; i < 10; i++ { b, err := testdata.RandomBytes() if err != nil { f.Fatal(err) } f.Add(string(b)) } // Seed with edge cases f.Add("") // empty f.Add("a") // single ASCII f.Add("\t\n\r") // whitespace f.Add("ðŸŒ") // emoji f.Add("\u0301") // combining mark f.Add("\xff\xfe\xfd") // invalid UTF-8 f.Fuzz(func(t *testing.T, text string) { // Exercise truncation to discover panics and infinite loops. // Width invariant testing is in proper unit tests. options := []Options{ {}, {EastAsianWidth: true}, {ControlSequences: true}, {ControlSequences8Bit: true}, {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, option := range options { ts := option.TruncateString(text, 10, "...") tb := option.TruncateBytes([]byte(text), 10, []byte("...")) // Invariant: String and Bytes paths must agree if !bytes.Equal(tb, []byte(ts)) { t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", option, text, tb, ts) } } }) } // FuzzControlSequences fuzzes strings containing ANSI/ECMA-48 escape sequences // across all option combinations (EastAsianWidth x ControlSequences). func FuzzControlSequences(f *testing.F) { if testing.Short() { f.Skip("skipping fuzz test in short mode") } // Seed with ANSI escape sequences f.Add([]byte("\x1b[31m")) // SGR red f.Add([]byte("\x1b[0m")) // SGR reset f.Add([]byte("\x1b[1m")) // SGR bold f.Add([]byte("\x1b[38;5;196m")) // SGR 256-color f.Add([]byte("\x1b[38;2;255;0;0m")) // SGR truecolor f.Add([]byte("\x1b[A")) // cursor up f.Add([]byte("\x1b[10;20H")) // cursor position f.Add([]byte("\x1b[2J")) // erase in display f.Add([]byte("\x1b[31mhello\x1b[0m")) // red text f.Add([]byte("\x1b[1m\x1b[31mhi\x1b[0m")) // nested SGR f.Add([]byte("hello\x1b[31mworld\x1b[0m")) // ANSI mid-string f.Add([]byte("\x1b[31m中文\x1b[0m")) // colored CJK f.Add([]byte("\x1b[31m😀\x1b[0m")) // colored emoji f.Add([]byte("\x1b[31m🇺🇸\x1b[0m")) // colored flag f.Add([]byte("a\x1b[31mb\x1b[32mc\x1b[33md\x1b[0m")) // multiple colors f.Add([]byte("\x1b[31m\x1b[42m\x1b[1mbold on red\x1b[0m")) // stacked SGR f.Add([]byte("\r\n")) // CR+LF f.Add([]byte("hello\r\nworld")) // text with CRLF f.Add([]byte("\x1b")) // bare ESC f.Add([]byte("\x1b[")) // incomplete sequence f.Add([]byte("\x1b[31")) // incomplete SGR f.Add([]byte("")) // empty f.Add([]byte("hello")) // plain ASCII f.Add([]byte("中文")) // plain CJK f.Add([]byte("😀")) // plain emoji // Seed with 8-bit C1 escape sequences f.Add([]byte("\x9B31m")) // C1 CSI red f.Add([]byte("\x9B0m")) // C1 CSI reset f.Add([]byte("\x9B1m")) // C1 CSI bold f.Add([]byte("\x9B31mhello\x9B0m")) // C1 CSI red text f.Add([]byte("\x9B1m\x9B31mhi\x9B0m")) // C1 nested SGR f.Add([]byte("hello\x9B31mworld\x9B0m")) // C1 mid-string f.Add([]byte("\x9B31m中文\x9B0m")) // C1 colored CJK f.Add([]byte("\x9B31m😀\x9B0m")) // C1 colored emoji f.Add([]byte("\x9D0;Title\x9C")) // C1 OSC with C1 ST f.Add([]byte("\x9D0;Title\x07")) // C1 OSC with BEL f.Add([]byte("\x90qpayload\x9C")) // C1 DCS with C1 ST f.Add([]byte("\x84")) // standalone C1 f.Add([]byte("\x1b[31mhello\x9B0m")) // mixed 7-bit and 8-bit // Seed with multi-lingual text file, err := testdata.Sample() if err != nil { f.Fatal(err) } chunks := bytes.Split(file, []byte("\n")) for _, chunk := range chunks { f.Add(chunk) } options := []Options{ {}, {EastAsianWidth: true}, {ControlSequences: true}, {ControlSequences8Bit: true}, {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true}, {EastAsianWidth: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences: true, ControlSequences8Bit: true}, } f.Fuzz(func(t *testing.T, text []byte) { for _, opt := range options { wb := opt.Bytes(text) ws := opt.String(string(text)) // Invariant: width is never negative if wb < 0 { t.Errorf("Bytes() with %+v returned negative width %d for %q", opt, wb, text) } // Invariant: String and Bytes agree if wb != ws { t.Errorf("Bytes()=%d != String()=%d with %+v for %q", wb, ws, opt, text) } // Invariant: empty input is always 0 if len(text) == 0 && wb != 0 { t.Errorf("non-zero width %d for empty input with %+v", wb, opt) } // Invariant: sum of grapheme widths equals total width bg := opt.BytesGraphemes(text) bgSum := 0 for bg.Next() { gw := bg.Width() if gw < 0 { t.Errorf("grapheme Width() < 0 with %+v for %q", opt, text) } bgSum += gw } if bgSum != wb { t.Errorf("sum of grapheme widths %d != Bytes() %d with %+v for %q", bgSum, wb, opt, text) } // Same for StringGraphemes sg := opt.StringGraphemes(string(text)) sgSum := 0 for sg.Next() { gw := sg.Width() if gw < 0 { t.Errorf("grapheme Width() < 0 with %+v for %q", opt, text) } sgSum += gw } if sgSum != ws { t.Errorf("sum of StringGraphemes widths %d != String() %d with %+v for %q", sgSum, ws, opt, text) } // Exercise truncation to discover panics and infinite loops. // Width invariant testing is in proper unit tests. tail := "..." for _, maxWidth := range []int{0, 1, 3, 5, 10, 20} { ts := opt.TruncateString(string(text), maxWidth, tail) tb := opt.TruncateBytes(text, maxWidth, []byte(tail)) // Invariant: String and Bytes paths must agree if !bytes.Equal(tb, []byte(ts)) { t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", opt, text, tb, ts) } } } }) } golang-github-clipperhouse-displaywidth-0.11.0+ds/gen.go000066400000000000000000000000751515060771000232310ustar00rootroot00000000000000package displaywidth //go:generate go run -C internal/gen . golang-github-clipperhouse-displaywidth-0.11.0+ds/go.mod000066400000000000000000000001461515060771000232360ustar00rootroot00000000000000module github.com/clipperhouse/displaywidth go 1.18 require github.com/clipperhouse/uax29/v2 v2.7.0 golang-github-clipperhouse-displaywidth-0.11.0+ds/go.sum000066400000000000000000000002671515060771000232670ustar00rootroot00000000000000github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= golang-github-clipperhouse-displaywidth-0.11.0+ds/graphemes.go000066400000000000000000000043111515060771000244300ustar00rootroot00000000000000package displaywidth import ( "github.com/clipperhouse/uax29/v2/graphemes" ) // Graphemes is an iterator over grapheme clusters. // // Iterate using the Next method, and get the width of the current grapheme // using the Width method. type Graphemes[T ~string | []byte] struct { iter *graphemes.Iterator[T] options Options } // Next advances the iterator to the next grapheme cluster. func (g *Graphemes[T]) Next() bool { return g.iter.Next() } // Value returns the current grapheme cluster. func (g *Graphemes[T]) Value() T { return g.iter.Value() } // Width returns the display width of the current grapheme cluster. func (g *Graphemes[T]) Width() int { return graphemeWidth(g.Value(), g.options) } // StringGraphemes returns an iterator over grapheme clusters for the given // string. // // Iterate using the Next method, and get the width of the current grapheme // using the Width method. func StringGraphemes(s string) Graphemes[string] { return DefaultOptions.StringGraphemes(s) } // StringGraphemes returns an iterator over grapheme clusters for the given // string, with the given options. // // Iterate using the Next method, and get the width of the current grapheme // using the Width method. func (options Options) StringGraphemes(s string) Graphemes[string] { g := graphemes.FromString(s) g.AnsiEscapeSequences = options.ControlSequences g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit return Graphemes[string]{iter: g, options: options} } // BytesGraphemes returns an iterator over grapheme clusters for the given // []byte. // // Iterate using the Next method, and get the width of the current grapheme // using the Width method. func BytesGraphemes(s []byte) Graphemes[[]byte] { return DefaultOptions.BytesGraphemes(s) } // BytesGraphemes returns an iterator over grapheme clusters for the given // []byte, with the given options. // // Iterate using the Next method, and get the width of the current grapheme // using the Width method. func (options Options) BytesGraphemes(s []byte) Graphemes[[]byte] { g := graphemes.FromBytes(s) g.AnsiEscapeSequences = options.ControlSequences g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit return Graphemes[[]byte]{iter: g, options: options} } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/000077500000000000000000000000001515060771000237435ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/000077500000000000000000000000001515060771000245145ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/go.mod000066400000000000000000000001021515060771000256130ustar00rootroot00000000000000module github.com/clipperhouse/displaywidth/internal/gen go 1.18 golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/go.sum000066400000000000000000000000001515060771000256350ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/main.go000066400000000000000000000012741515060771000257730ustar00rootroot00000000000000// Package main generates tries of Unicode properties for string width calculation package main import ( "fmt" "log" "path/filepath" ) func main() { fmt.Println("Generating string width trie...") // Parse Unicode data data, err := ParseUnicodeData() if err != nil { log.Fatalf("Failed to parse Unicode data: %v", err) } // Generate trie trie, err := GenerateTrie(data) if err != nil { log.Fatalf("Failed to generate trie: %v", err) } // Write trie to output file outputPath := filepath.Join("..", "..", "trie.go") if err := WriteTrieGo(trie, outputPath); err != nil { log.Fatalf("Failed to write trie: %v", err) } fmt.Println("Trie generation completed successfully!") } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/trie.go000066400000000000000000000061601515060771000260110ustar00rootroot00000000000000// Package trie handles the generation of compressed tries for character properties package main import ( "bytes" "fmt" "go/format" "io" "os" "unicode" "github.com/clipperhouse/displaywidth/internal/gen/triegen" ) // GenerateTrie creates a compressed trie from Unicode data using triegen func GenerateTrie(data *UnicodeData) (*triegen.Trie, error) { trie := triegen.NewTrie("stringWidth") // Insert all characters with non-default properties inserted := 0 for r := rune(0); r <= unicode.MaxRune; r++ { // Skip surrogate characters (U+D800-U+DFFF) and other invalid characters if r >= 0xD800 && r <= 0xDFFF { continue } // Skip characters that would create invalid UTF-8 if r == unicode.ReplacementChar { continue } props := buildPropertyBitmap(r, data) // Only insert characters with non-default properties if props != 0 { trie.Insert(r, uint64(props)) inserted++ } } fmt.Printf("Inserted %d characters with non-default properties\n", inserted) return trie, nil } // WriteTrieGo generates the Go code for the trie using triegen func WriteTrieGo(trie *triegen.Trie, outputPath string) error { buf := &bytes.Buffer{} // Write package header fmt.Fprintf(buf, "// Code generated by internal/gen/main.go. DO NOT EDIT.\n\n") fmt.Fprintf(buf, "package displaywidth\n\n") // Write property definitions writeProperties(buf) // Generate the trie using triegen (it will use uint8/uint16/etc directly) size, err := trie.Gen(buf) if err != nil { return fmt.Errorf("failed to generate trie: %v", err) } b := buf.Bytes() typename := "stringWidthTrie" typeDefSig := `type ` + typename + ` struct` noTypeDef := `// ` + typeDefSig b = bytes.ReplaceAll(b, []byte(typeDefSig), []byte(noTypeDef)) lookupSig := `(t *` + typename + `) lookup(s []byte)` genericLookupSig := `lookup[T ~string | []byte](s T)` b = bytes.ReplaceAll(b, []byte(lookupSig), []byte(genericLookupSig)) lookupValueSig := `(t *` + typename + `) lookupValue` genericLookupValueSig := `lookupValue` b = bytes.ReplaceAll(b, []byte(lookupValueSig), []byte(genericLookupValueSig)) lookupCallSig := `t.lookupValue(` genericLookupCallSig := `lookupValue(` b = bytes.ReplaceAll(b, []byte(lookupCallSig), []byte(genericLookupCallSig)) formatted, err := format.Source(b) if err != nil { return err } dst, err := os.Create(outputPath) if err != nil { return err } defer dst.Close() _, err = dst.Write(formatted) if err != nil { return err } fmt.Printf("Generated trie with size %d bytes\n", size) return nil } // writeProperties writes the character properties definitions to the buffer. // It uses PropertyDefinitions from unicode.go as the single source of truth. func writeProperties(w io.Writer) { fmt.Fprintf(w, "// property is an enum representing the properties of a character\n") fmt.Fprintf(w, "type property uint8\n\n") fmt.Fprintf(w, "const (\n") for i, prop := range PropertyDefinitions { fmt.Fprintf(w, "// %s\n", prop.Comment) constName := "_" + prop.Name if i == 0 { fmt.Fprintf(w, "%s property = iota + 1\n", constName) } else { fmt.Fprintf(w, "%s\n", constName) } } fmt.Fprintf(w, ")\n\n") } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/000077500000000000000000000000001515060771000261515ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/LICENSE000066400000000000000000000026551515060771000271660ustar00rootroot00000000000000Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/compact.go000066400000000000000000000037661515060771000301420ustar00rootroot00000000000000// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package triegen // This file defines Compacter and its implementations. import "io" // A Compacter generates an alternative, more space-efficient way to store a // trie value block. A trie value block holds all possible values for the last // byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block // always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0). type Compacter interface { // Size returns whether the Compacter could encode the given block as well // as its size in case it can. len(v) is always 64. Size(v []uint64) (sz int, ok bool) // Store stores the block using the Compacter's compression method. // It returns a handle with which the block can be retrieved. // len(v) is always 64. Store(v []uint64) uint32 // Print writes the data structures associated to the given store to w. Print(w io.Writer) error // Handler returns the name of a function that gets called during trie // lookup for blocks generated by the Compacter. The function should be of // the form func (n uint32, b byte) uint64, where n is the index returned by // the Compacter's Store method and b is the last byte of the UTF-8 // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the // block. Handler() string } // simpleCompacter is the default Compacter used by builder. It implements a // normal trie block. type simpleCompacter builder func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) { return blockSize * b.ValueSize, true } func (b *simpleCompacter) Store(v []uint64) uint32 { h := uint32(len(b.ValueBlocks) - blockOffset) b.ValueBlocks = append(b.ValueBlocks, v) return h } func (b *simpleCompacter) Print(io.Writer) error { // Structures are printed in print.go. return nil } func (b *simpleCompacter) Handler() string { panic("Handler should be special-cased for this Compacter") } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/data_test.go000066400000000000000000000543011515060771000304530ustar00rootroot00000000000000// This file is generated with "go test -tags generate". DO NOT EDIT! // +build !generate package triegen_test // lookup returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *randTrie) lookup(s []byte) (v uint8, sz int) { c0 := s[0] switch { case c0 < 0x80: // is ASCII return randValues[c0], 1 case c0 < 0xC2: return 0, 1 // Illegal UTF-8: not a starter, not ASCII. case c0 < 0xE0: // 2-byte UTF-8 if len(s) < 2 { return 0, 0 } i := randIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c1), 2 case c0 < 0xF0: // 3-byte UTF-8 if len(s) < 3 { return 0, 0 } i := randIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = randIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c2), 3 case c0 < 0xF8: // 4-byte UTF-8 if len(s) < 4 { return 0, 0 } i := randIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = randIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } o = uint32(i)<<6 + uint32(c2) i = randIndex[o] c3 := s[3] if c3 < 0x80 || 0xC0 <= c3 { return 0, 3 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c3), 4 } // Illegal rune return 0, 1 } // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. // s must start with a full and valid UTF-8 encoded rune. func (t *randTrie) lookupUnsafe(s []byte) uint8 { c0 := s[0] if c0 < 0x80 { // is ASCII return randValues[c0] } i := randIndex[c0] if c0 < 0xE0 { // 2-byte UTF-8 return t.lookupValue(uint32(i), s[1]) } i = randIndex[uint32(i)<<6+uint32(s[1])] if c0 < 0xF0 { // 3-byte UTF-8 return t.lookupValue(uint32(i), s[2]) } i = randIndex[uint32(i)<<6+uint32(s[2])] if c0 < 0xF8 { // 4-byte UTF-8 return t.lookupValue(uint32(i), s[3]) } return 0 } // lookupString returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *randTrie) lookupString(s string) (v uint8, sz int) { c0 := s[0] switch { case c0 < 0x80: // is ASCII return randValues[c0], 1 case c0 < 0xC2: return 0, 1 // Illegal UTF-8: not a starter, not ASCII. case c0 < 0xE0: // 2-byte UTF-8 if len(s) < 2 { return 0, 0 } i := randIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c1), 2 case c0 < 0xF0: // 3-byte UTF-8 if len(s) < 3 { return 0, 0 } i := randIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = randIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c2), 3 case c0 < 0xF8: // 4-byte UTF-8 if len(s) < 4 { return 0, 0 } i := randIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = randIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } o = uint32(i)<<6 + uint32(c2) i = randIndex[o] c3 := s[3] if c3 < 0x80 || 0xC0 <= c3 { return 0, 3 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c3), 4 } // Illegal rune return 0, 1 } // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. // s must start with a full and valid UTF-8 encoded rune. func (t *randTrie) lookupStringUnsafe(s string) uint8 { c0 := s[0] if c0 < 0x80 { // is ASCII return randValues[c0] } i := randIndex[c0] if c0 < 0xE0 { // 2-byte UTF-8 return t.lookupValue(uint32(i), s[1]) } i = randIndex[uint32(i)<<6+uint32(s[1])] if c0 < 0xF0 { // 3-byte UTF-8 return t.lookupValue(uint32(i), s[2]) } i = randIndex[uint32(i)<<6+uint32(s[2])] if c0 < 0xF8 { // 4-byte UTF-8 return t.lookupValue(uint32(i), s[3]) } return 0 } // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f. type randTrie struct{} func newRandTrie(i int) *randTrie { return &randTrie{} } // lookupValue determines the type of block n and looks up the value for b. func (t *randTrie) lookupValue(n uint32, b byte) uint8 { switch { default: return uint8(randValues[n<<6+uint32(b)]) } } // randValues: 56 blocks, 3584 entries, 3584 bytes // The third block is the zero block. var randValues = [3584]uint8{ // Block 0x0, offset 0x0 // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 0xc9: 0x0001, // Block 0x4, offset 0x100 0x100: 0x0001, // Block 0x5, offset 0x140 0x155: 0x0001, // Block 0x6, offset 0x180 0x196: 0x0001, // Block 0x7, offset 0x1c0 0x1ef: 0x0001, // Block 0x8, offset 0x200 0x206: 0x0001, // Block 0x9, offset 0x240 0x258: 0x0001, // Block 0xa, offset 0x280 0x288: 0x0001, // Block 0xb, offset 0x2c0 0x2f2: 0x0001, // Block 0xc, offset 0x300 0x304: 0x0001, // Block 0xd, offset 0x340 0x34b: 0x0001, // Block 0xe, offset 0x380 0x3ba: 0x0001, // Block 0xf, offset 0x3c0 0x3f5: 0x0001, // Block 0x10, offset 0x400 0x41d: 0x0001, // Block 0x11, offset 0x440 0x442: 0x0001, // Block 0x12, offset 0x480 0x4bb: 0x0001, // Block 0x13, offset 0x4c0 0x4e9: 0x0001, // Block 0x14, offset 0x500 0x53e: 0x0001, // Block 0x15, offset 0x540 0x55f: 0x0001, // Block 0x16, offset 0x580 0x5b7: 0x0001, // Block 0x17, offset 0x5c0 0x5d9: 0x0001, // Block 0x18, offset 0x600 0x60e: 0x0001, // Block 0x19, offset 0x640 0x652: 0x0001, // Block 0x1a, offset 0x680 0x68f: 0x0001, // Block 0x1b, offset 0x6c0 0x6dc: 0x0001, // Block 0x1c, offset 0x700 0x703: 0x0001, // Block 0x1d, offset 0x740 0x741: 0x0001, // Block 0x1e, offset 0x780 0x79b: 0x0001, // Block 0x1f, offset 0x7c0 0x7f1: 0x0001, // Block 0x20, offset 0x800 0x833: 0x0001, // Block 0x21, offset 0x840 0x853: 0x0001, // Block 0x22, offset 0x880 0x8a2: 0x0001, // Block 0x23, offset 0x8c0 0x8f8: 0x0001, // Block 0x24, offset 0x900 0x917: 0x0001, // Block 0x25, offset 0x940 0x945: 0x0001, // Block 0x26, offset 0x980 0x99e: 0x0001, // Block 0x27, offset 0x9c0 0x9fd: 0x0001, // Block 0x28, offset 0xa00 0xa0d: 0x0001, // Block 0x29, offset 0xa40 0xa66: 0x0001, // Block 0x2a, offset 0xa80 0xaab: 0x0001, // Block 0x2b, offset 0xac0 0xaea: 0x0001, // Block 0x2c, offset 0xb00 0xb2d: 0x0001, // Block 0x2d, offset 0xb40 0xb54: 0x0001, // Block 0x2e, offset 0xb80 0xb90: 0x0001, // Block 0x2f, offset 0xbc0 0xbe5: 0x0001, // Block 0x30, offset 0xc00 0xc28: 0x0001, // Block 0x31, offset 0xc40 0xc7c: 0x0001, // Block 0x32, offset 0xc80 0xcbf: 0x0001, // Block 0x33, offset 0xcc0 0xcc7: 0x0001, // Block 0x34, offset 0xd00 0xd34: 0x0001, // Block 0x35, offset 0xd40 0xd61: 0x0001, // Block 0x36, offset 0xd80 0xdb9: 0x0001, // Block 0x37, offset 0xdc0 0xdda: 0x0001, } // randIndex: 89 blocks, 5696 entries, 5696 bytes // Block 0 is the zero block. var randIndex = [5696]uint8{ // Block 0x0, offset 0x0 // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04, 0xea: 0x05, 0xeb: 0x06, 0xec: 0x07, 0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56, // Block 0x4, offset 0x100 0x107: 0x01, // Block 0x5, offset 0x140 0x16c: 0x02, // Block 0x6, offset 0x180 0x19c: 0x03, 0x1ae: 0x04, // Block 0x7, offset 0x1c0 0x1d8: 0x05, 0x1f7: 0x06, // Block 0x8, offset 0x200 0x20c: 0x07, // Block 0x9, offset 0x240 0x24a: 0x08, // Block 0xa, offset 0x280 0x2b6: 0x09, // Block 0xb, offset 0x2c0 0x2d5: 0x0a, // Block 0xc, offset 0x300 0x31a: 0x0b, // Block 0xd, offset 0x340 0x373: 0x0c, // Block 0xe, offset 0x380 0x38b: 0x0d, // Block 0xf, offset 0x3c0 0x3f0: 0x0e, // Block 0x10, offset 0x400 0x433: 0x0f, // Block 0x11, offset 0x440 0x45d: 0x10, // Block 0x12, offset 0x480 0x491: 0x08, 0x494: 0x09, 0x497: 0x0a, 0x49b: 0x0b, 0x49c: 0x0c, 0x4a1: 0x0d, 0x4ad: 0x0e, 0x4ba: 0x0f, // Block 0x13, offset 0x4c0 0x4c1: 0x11, // Block 0x14, offset 0x500 0x531: 0x12, // Block 0x15, offset 0x540 0x546: 0x13, // Block 0x16, offset 0x580 0x5ab: 0x14, // Block 0x17, offset 0x5c0 0x5d4: 0x11, 0x5fe: 0x11, // Block 0x18, offset 0x600 0x618: 0x0a, // Block 0x19, offset 0x640 0x65b: 0x15, // Block 0x1a, offset 0x680 0x6a0: 0x16, // Block 0x1b, offset 0x6c0 0x6d2: 0x17, 0x6f6: 0x18, // Block 0x1c, offset 0x700 0x711: 0x19, // Block 0x1d, offset 0x740 0x768: 0x1a, // Block 0x1e, offset 0x780 0x783: 0x1b, // Block 0x1f, offset 0x7c0 0x7f9: 0x1c, // Block 0x20, offset 0x800 0x831: 0x1d, // Block 0x21, offset 0x840 0x85e: 0x1e, // Block 0x22, offset 0x880 0x898: 0x1f, // Block 0x23, offset 0x8c0 0x8c7: 0x18, 0x8d5: 0x14, 0x8f7: 0x20, 0x8fe: 0x1f, // Block 0x24, offset 0x900 0x905: 0x21, // Block 0x25, offset 0x940 0x966: 0x03, // Block 0x26, offset 0x980 0x981: 0x07, 0x983: 0x11, 0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15, 0x992: 0x16, 0x995: 0x17, 0x996: 0x18, 0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c, 0x9a3: 0x1d, 0x9ad: 0x1e, 0x9af: 0x1f, 0x9b0: 0x20, 0x9b1: 0x21, 0x9b8: 0x22, 0x9bd: 0x23, // Block 0x27, offset 0x9c0 0x9cd: 0x22, // Block 0x28, offset 0xa00 0xa0c: 0x08, // Block 0x29, offset 0xa40 0xa6f: 0x1c, // Block 0x2a, offset 0xa80 0xa90: 0x1a, 0xaaf: 0x23, // Block 0x2b, offset 0xac0 0xae3: 0x19, 0xae8: 0x24, 0xafc: 0x25, // Block 0x2c, offset 0xb00 0xb13: 0x26, // Block 0x2d, offset 0xb40 0xb67: 0x1c, // Block 0x2e, offset 0xb80 0xb8f: 0x0b, // Block 0x2f, offset 0xbc0 0xbcb: 0x27, 0xbe7: 0x26, // Block 0x30, offset 0xc00 0xc34: 0x16, // Block 0x31, offset 0xc40 0xc62: 0x03, // Block 0x32, offset 0xc80 0xcbb: 0x12, // Block 0x33, offset 0xcc0 0xcdf: 0x09, // Block 0x34, offset 0xd00 0xd34: 0x0a, // Block 0x35, offset 0xd40 0xd41: 0x1e, // Block 0x36, offset 0xd80 0xd83: 0x28, // Block 0x37, offset 0xdc0 0xdc0: 0x15, // Block 0x38, offset 0xe00 0xe1a: 0x15, // Block 0x39, offset 0xe40 0xe65: 0x29, // Block 0x3a, offset 0xe80 0xe86: 0x1f, // Block 0x3b, offset 0xec0 0xeec: 0x18, // Block 0x3c, offset 0xf00 0xf28: 0x2a, // Block 0x3d, offset 0xf40 0xf53: 0x08, // Block 0x3e, offset 0xf80 0xfa2: 0x2b, 0xfaa: 0x17, // Block 0x3f, offset 0xfc0 0xfc0: 0x25, 0xfc2: 0x26, 0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29, 0xfd5: 0x2a, 0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d, 0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31, 0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35, 0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39, 0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c, // Block 0x40, offset 0x1000 0x102c: 0x2c, // Block 0x41, offset 0x1040 0x1074: 0x2c, // Block 0x42, offset 0x1080 0x108c: 0x08, 0x10a0: 0x2d, // Block 0x43, offset 0x10c0 0x10e8: 0x10, // Block 0x44, offset 0x1100 0x110f: 0x13, // Block 0x45, offset 0x1140 0x114b: 0x2e, // Block 0x46, offset 0x1180 0x118b: 0x23, 0x119d: 0x0c, // Block 0x47, offset 0x11c0 0x11c3: 0x12, 0x11f9: 0x0f, // Block 0x48, offset 0x1200 0x121e: 0x1b, // Block 0x49, offset 0x1240 0x1270: 0x2f, // Block 0x4a, offset 0x1280 0x128a: 0x1b, 0x12a7: 0x02, // Block 0x4b, offset 0x12c0 0x12fb: 0x14, // Block 0x4c, offset 0x1300 0x1333: 0x30, // Block 0x4d, offset 0x1340 0x134d: 0x31, // Block 0x4e, offset 0x1380 0x138e: 0x15, // Block 0x4f, offset 0x13c0 0x13f4: 0x32, // Block 0x50, offset 0x1400 0x141b: 0x33, // Block 0x51, offset 0x1440 0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41, 0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45, 0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a, 0x1472: 0x4b, 0x1473: 0x4c, 0x1479: 0x4d, 0x147b: 0x4e, // Block 0x52, offset 0x1480 0x1480: 0x34, 0x1499: 0x11, 0x14b6: 0x2c, // Block 0x53, offset 0x14c0 0x14e4: 0x0d, // Block 0x54, offset 0x1500 0x1527: 0x08, // Block 0x55, offset 0x1540 0x1555: 0x2b, // Block 0x56, offset 0x1580 0x15b2: 0x35, // Block 0x57, offset 0x15c0 0x15f2: 0x1c, 0x15f4: 0x29, // Block 0x58, offset 0x1600 0x1600: 0x50, 0x1603: 0x51, 0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55, } // lookup returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *multiTrie) lookup(s []byte) (v uint64, sz int) { c0 := s[0] switch { case c0 < 0x80: // is ASCII return t.ascii[c0], 1 case c0 < 0xC2: return 0, 1 // Illegal UTF-8: not a starter, not ASCII. case c0 < 0xE0: // 2-byte UTF-8 if len(s) < 2 { return 0, 0 } i := t.utf8Start[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c1), 2 case c0 < 0xF0: // 3-byte UTF-8 if len(s) < 3 { return 0, 0 } i := t.utf8Start[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = multiIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c2), 3 case c0 < 0xF8: // 4-byte UTF-8 if len(s) < 4 { return 0, 0 } i := t.utf8Start[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = multiIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } o = uint32(i)<<6 + uint32(c2) i = multiIndex[o] c3 := s[3] if c3 < 0x80 || 0xC0 <= c3 { return 0, 3 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c3), 4 } // Illegal rune return 0, 1 } // lookupUnsafe returns the trie value for the first UTF-8 encoding in s. // s must start with a full and valid UTF-8 encoded rune. func (t *multiTrie) lookupUnsafe(s []byte) uint64 { c0 := s[0] if c0 < 0x80 { // is ASCII return t.ascii[c0] } i := t.utf8Start[c0] if c0 < 0xE0 { // 2-byte UTF-8 return t.lookupValue(uint32(i), s[1]) } i = multiIndex[uint32(i)<<6+uint32(s[1])] if c0 < 0xF0 { // 3-byte UTF-8 return t.lookupValue(uint32(i), s[2]) } i = multiIndex[uint32(i)<<6+uint32(s[2])] if c0 < 0xF8 { // 4-byte UTF-8 return t.lookupValue(uint32(i), s[3]) } return 0 } // lookupString returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *multiTrie) lookupString(s string) (v uint64, sz int) { c0 := s[0] switch { case c0 < 0x80: // is ASCII return t.ascii[c0], 1 case c0 < 0xC2: return 0, 1 // Illegal UTF-8: not a starter, not ASCII. case c0 < 0xE0: // 2-byte UTF-8 if len(s) < 2 { return 0, 0 } i := t.utf8Start[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c1), 2 case c0 < 0xF0: // 3-byte UTF-8 if len(s) < 3 { return 0, 0 } i := t.utf8Start[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = multiIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c2), 3 case c0 < 0xF8: // 4-byte UTF-8 if len(s) < 4 { return 0, 0 } i := t.utf8Start[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = multiIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } o = uint32(i)<<6 + uint32(c2) i = multiIndex[o] c3 := s[3] if c3 < 0x80 || 0xC0 <= c3 { return 0, 3 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c3), 4 } // Illegal rune return 0, 1 } // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s. // s must start with a full and valid UTF-8 encoded rune. func (t *multiTrie) lookupStringUnsafe(s string) uint64 { c0 := s[0] if c0 < 0x80 { // is ASCII return t.ascii[c0] } i := t.utf8Start[c0] if c0 < 0xE0 { // 2-byte UTF-8 return t.lookupValue(uint32(i), s[1]) } i = multiIndex[uint32(i)<<6+uint32(s[1])] if c0 < 0xF0 { // 3-byte UTF-8 return t.lookupValue(uint32(i), s[2]) } i = multiIndex[uint32(i)<<6+uint32(s[2])] if c0 < 0xF8 { // 4-byte UTF-8 return t.lookupValue(uint32(i), s[3]) } return 0 } // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e. type multiTrie struct { ascii []uint64 // index for ASCII bytes utf8Start []uint8 // index for UTF-8 bytes >= 0xC0 } func newMultiTrie(i int) *multiTrie { h := multiTrieHandles[i] return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]} } type multiTrieHandle struct { ascii, multi uint8 } // multiTrieHandles: 5 handles, 10 bytes var multiTrieHandles = [5]multiTrieHandle{ {0, 0}, // 8c1e77823143d35c: all {0, 23}, // 8fb58ff8243b45b0: ASCII only {0, 23}, // 8fb58ff8243b45b0: ASCII only 2 {0, 24}, // 2ccc43994f11046f: BMP only {30, 25}, // ce448591bdcb4733: No BMP } // lookupValue determines the type of block n and looks up the value for b. func (t *multiTrie) lookupValue(n uint32, b byte) uint64 { switch { default: return uint64(multiValues[n<<6+uint32(b)]) } } // multiValues: 32 blocks, 2048 entries, 16384 bytes // The third block is the zero block. var multiValues = [2048]uint64{ // Block 0x0, offset 0x0 0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10, 0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551, 0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884, 0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8, 0x3f: 0x4fd3bcfa72bce8b0, // Block 0x1, offset 0x40 0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357, 0x7f: 0x782caa2d25a418a9, // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4, // Block 0x4, offset 0x100 0x13f: 0x56f8c4c82f5962dc, // Block 0x5, offset 0x140 0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d, // Block 0x6, offset 0x180 0x1bf: 0x7bf4d0ebf302a088, // Block 0x7, offset 0x1c0 0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7, // Block 0x8, offset 0x200 0x23f: 0x5de81c1dff6bf29d, // Block 0x9, offset 0x240 0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3, // Block 0xa, offset 0x280 0x2bf: 0x6a28f01979cbf059, // Block 0xb, offset 0x2c0 0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c, // Block 0xc, offset 0x300 0x33f: 0x5a10ffa9e29184fb, // Block 0xd, offset 0x340 0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79, // Block 0xe, offset 0x380 0x3bf: 0x74071288fff39c76, // Block 0xf, offset 0x3c0 0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849, // Block 0x10, offset 0x400 0x43f: 0x5676a62fd49c6bec, // Block 0x11, offset 0x440 0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f, // Block 0x12, offset 0x480 0x4bf: 0x69d6f0fe711fafc9, // Block 0x13, offset 0x4c0 0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02, // Block 0x14, offset 0x500 0x53f: 0xe03b31814c95f8b, // Block 0x15, offset 0x540 0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc, // Block 0x16, offset 0x580 0x5bf: 0x3c02ea92fb168559, // Block 0x17, offset 0x5c0 0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645, // Block 0x18, offset 0x600 0x63f: 0x3bb2ed2a72748f4b, // Block 0x19, offset 0x640 0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6, // Block 0x1a, offset 0x680 0x6bf: 0x352711cfb7236418, // Block 0x1b, offset 0x6c0 0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1, // Block 0x1c, offset 0x700 0x73f: 0x7191a77b28d23110, // Block 0x1d, offset 0x740 0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de, // Block 0x1e, offset 0x780 // Block 0x1f, offset 0x7c0 } // multiIndex: 29 blocks, 1856 entries, 1856 bytes // Block 0 is the zero block. var multiIndex = [1856]uint8{ // Block 0x0, offset 0x0 // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04, 0xc8: 0x05, 0xcf: 0x06, 0xd0: 0x07, 0xdf: 0x08, 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07, 0xe8: 0x08, 0xef: 0x09, 0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17, // Block 0x4, offset 0x100 0x120: 0x09, 0x13f: 0x0a, // Block 0x5, offset 0x140 0x140: 0x0b, 0x17f: 0x0c, // Block 0x6, offset 0x180 0x180: 0x0d, // Block 0x7, offset 0x1c0 0x1ff: 0x0e, // Block 0x8, offset 0x200 0x200: 0x0f, // Block 0x9, offset 0x240 0x27f: 0x10, // Block 0xa, offset 0x280 0x280: 0x11, // Block 0xb, offset 0x2c0 0x2ff: 0x12, // Block 0xc, offset 0x300 0x300: 0x13, // Block 0xd, offset 0x340 0x37f: 0x14, // Block 0xe, offset 0x380 0x380: 0x15, // Block 0xf, offset 0x3c0 0x3ff: 0x16, // Block 0x10, offset 0x400 0x410: 0x0a, 0x41f: 0x0b, 0x420: 0x0c, 0x43f: 0x0d, // Block 0x11, offset 0x440 0x440: 0x17, // Block 0x12, offset 0x480 0x4bf: 0x18, // Block 0x13, offset 0x4c0 0x4c0: 0x0f, 0x4ff: 0x10, // Block 0x14, offset 0x500 0x500: 0x19, // Block 0x15, offset 0x540 0x540: 0x12, // Block 0x16, offset 0x580 0x5bf: 0x1a, // Block 0x17, offset 0x5c0 0x5ff: 0x14, // Block 0x18, offset 0x600 0x600: 0x1b, // Block 0x19, offset 0x640 0x640: 0x16, // Block 0x1a, offset 0x680 // Block 0x1b, offset 0x6c0 0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04, 0x6c8: 0x05, 0x6cf: 0x06, 0x6d0: 0x07, 0x6df: 0x08, 0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07, 0x6e8: 0x08, 0x6ef: 0x09, // Block 0x1c, offset 0x700 0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17, } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/gen_test.go000066400000000000000000000032141515060771000303100ustar00rootroot00000000000000// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build generate package triegen_test // The code in this file generates captures and writes the tries generated in // the examples to data_test.go. To invoke it, run: // go test -tags=generate // // Making the generation code a "test" allows us to link in the necessary test // code. import ( "log" "os" "os/exec" ) func init() { const tmpfile = "tmpout" const dstfile = "data_test.go" f, err := os.Create(tmpfile) if err != nil { log.Fatalf("Could not create output file: %v", err) } defer os.Remove(tmpfile) defer f.Close() // We exit before this function returns, regardless of success or failure, // so there's no need to save (and later restore) the existing genWriter // value. genWriter = f f.Write([]byte(header)) Example_build() ExampleGen_build() if err := exec.Command("gofmt", "-w", tmpfile).Run(); err != nil { log.Fatal(err) } os.Remove(dstfile) os.Rename(tmpfile, dstfile) os.Exit(0) } const header = `// This file is generated with "go test -tags generate". DO NOT EDIT! // +build !generate package triegen_test ` // Stubs for generated tries. These are needed as we exclude data_test.go if // the generate flag is set. This will clearly make the tests fail, but that // is okay. It allows us to bootstrap. type trie struct{} func (t *trie) lookupString(string) (uint8, int) { return 0, 1 } func (t *trie) lookupStringUnsafe(string) uint64 { return 0 } func newRandTrie(i int) *trie { return &trie{} } func newMultiTrie(i int) *trie { return &trie{} } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/print.go000066400000000000000000000150671515060771000276450ustar00rootroot00000000000000// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package triegen import ( "bytes" "fmt" "io" "strings" "text/template" ) // print writes all the data structures as well as the code necessary to use the // trie to w. func (b *builder) print(w io.Writer) error { b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize // If we only have one root trie, all starter blocks are at position 0 and // we can access the arrays directly. if len(b.Trie) == 1 { // At this point we cannot refer to the generated tables directly. b.ASCIIBlock = b.Name + "Values" b.StarterBlock = b.Name + "Index" } else { // Otherwise we need to have explicit starter indexes in the trie // structure. b.ASCIIBlock = "t.ascii" b.StarterBlock = "t.utf8Start" } b.SourceType = "[]byte" if err := lookupGen.Execute(w, b); err != nil { return err } if err := trieGen.Execute(w, b); err != nil { return err } for _, c := range b.Compactions { if err := c.c.Print(w); err != nil { return err } } return nil } func printValues(n int, values []uint64) string { w := &bytes.Buffer{} boff := n * blockSize fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff) var newline bool for i, v := range values { if i%6 == 0 { newline = true } if v != 0 { if newline { fmt.Fprintf(w, "\n") newline = false } fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v) } } return w.String() } func printIndex(b *builder, nr int, n *node) string { w := &bytes.Buffer{} boff := nr * blockSize fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff) var newline bool for i, c := range n.children { if i%8 == 0 { newline = true } if c != nil { v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index) if v != 0 { if newline { fmt.Fprintf(w, "\n") newline = false } fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v) } } } return w.String() } var ( trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{ "printValues": printValues, "printIndex": printIndex, "title": strings.Title, "dec": func(x int) int { return x - 1 }, "psize": func(n int) string { return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024) }, }).Parse(trieTemplate)) lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate)) ) // TODO: consider the return type of lookup. It could be uint64, even if the // internal value type is smaller. We will have to verify this with the // performance of unicode/norm, which is very sensitive to such changes. const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}} // {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}. type {{.Name}}Trie struct { {{if $multi}} ascii []{{.ValueType}} // index for ASCII bytes utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0 {{end}}} // func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}} // h := {{.Name}}TrieHandles[i] // return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] } // } // // type {{.Name}}TrieHandle struct { // ascii, multi {{.IndexType}} // } // // // {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes // var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{ // {{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}} // {{end}}}{{else}} // return &{{.Name}}Trie{} // } {{end}} // lookupValue determines the type of block n and looks up the value for b. func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} { switch { {{range $i, $c := .Compactions}} {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}} n -= {{$c.Offset}}{{end}} return {{print $b.ValueType}}({{$c.Handler}}){{end}} } } // {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes // The third block is the zero block. var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} { {{range $i, $v := .ValueBlocks}}{{printValues $i $v}} {{end}}} // {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes // Block 0 is the zero block. var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} { {{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}} {{end}}} ` // TODO: consider allowing zero-length strings after evaluating performance with // unicode/norm. const lookupTemplate = ` // lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) { c0 := s[0] switch { case c0 < 0x80: // is ASCII return {{.ASCIIBlock}}[c0], 1 case c0 < 0xC2: return 0, 1 // Illegal UTF-8: not a starter, not ASCII. case c0 < 0xE0: // 2-byte UTF-8 if len(s) < 2 { return 0, 0 } i := {{.StarterBlock}}[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c1), 2 case c0 < 0xF0: // 3-byte UTF-8 if len(s) < 3 { return 0, 0 } i := {{.StarterBlock}}[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = {{.Name}}Index[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c2), 3 case c0 < 0xF8: // 4-byte UTF-8 if len(s) < 4 { return 0, 0 } i := {{.StarterBlock}}[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = {{.Name}}Index[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } o = uint32(i)<<6 + uint32(c2) i = {{.Name}}Index[o] c3 := s[3] if c3 < 0x80 || 0xC0 <= c3 { return 0, 3 // Illegal UTF-8: not a continuation byte. } return t.lookupValue(uint32(i), c3), 4 } // Illegal rune return 0, 1 } ` golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/triegen/triegen.go000066400000000000000000000344741515060771000301510ustar00rootroot00000000000000// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package triegen implements a code generator for a trie for associating // unsigned integer values with UTF-8 encoded runes. // // Many of the go.text packages use tries for storing per-rune information. A // trie is especially useful if many of the runes have the same value. If this // is the case, many blocks can be expected to be shared allowing for // information on many runes to be stored in little space. // // As most of the lookups are done directly on []byte slices, the tries use the // UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to // runes and contributes a little bit to better performance. It also naturally // provides a fast path for ASCII. // // Space is also an issue. There are many code points defined in Unicode and as // a result tables can get quite large. So every byte counts. The triegen // package automatically chooses the smallest integer values to represent the // tables. Compacters allow further compression of the trie by allowing for // alternative representations of individual trie blocks. // // triegen allows generating multiple tries as a single structure. This is // useful when, for example, one wants to generate tries for several languages // that have a lot of values in common. Some existing libraries for // internationalization store all per-language data as a dynamically loadable // chunk. The go.text packages are designed with the assumption that the user // typically wants to compile in support for all supported languages, in line // with the approach common to Go to create a single standalone binary. The // multi-root trie approach can give significant storage savings in this // scenario. // // triegen generates both tables and code. The code is optimized to use the // automatically chosen data types. The following code is generated for a Trie // or multiple Tries named "foo": // // - type fooTrie // The trie type. // // - func newFooTrie(x int) *fooTrie // Trie constructor, where x is the index of the trie passed to Gen. // // - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) // The lookup method, where uintX is automatically chosen. // // - func lookupString, lookupUnsafe and lookupStringUnsafe // Variants of the above. // // - var fooValues and fooIndex and any tables generated by Compacters. // The core trie data. // // - var fooTrieHandles // Indexes of starter blocks in case of multiple trie roots. // // It is recommended that users test the generated trie by checking the returned // value for every rune. Such exhaustive tests are possible as the number of // runes in Unicode is limited. package triegen // import "golang.org/x/text/internal/triegen" // TODO: Arguably, the internally optimized data types would not have to be // exposed in the generated API. We could also investigate not generating the // code, but using it through a package. We would have to investigate the impact // on performance of making such change, though. For packages like unicode/norm, // small changes like this could tank performance. import ( "encoding/binary" "fmt" "hash/crc64" "io" "log" "unicode/utf8" ) // builder builds a set of tries for associating values with runes. The set of // tries can share common index and value blocks. type builder struct { Name string // ValueType is the type of the trie values looked up. ValueType string // ValueSize is the byte size of the ValueType. ValueSize int // IndexType is the type of trie index values used for all UTF-8 bytes of // a rune except the last one. IndexType string // IndexSize is the byte size of the IndexType. IndexSize int // SourceType is used when generating the lookup functions. If the user // requests StringSupport, all lookup functions will be generated for // string input as well. SourceType string Trie []*Trie IndexBlocks []*node ValueBlocks [][]uint64 Compactions []compaction Checksum uint64 ASCIIBlock string StarterBlock string indexBlockIdx map[uint64]int valueBlockIdx map[uint64]nodeIndex asciiBlockIdx map[uint64]int // Stats are used to fill out the template. Stats struct { NValueEntries int NValueBytes int NIndexEntries int NIndexBytes int NHandleBytes int } err error } // A nodeIndex encodes the index of a node, which is defined by the compaction // which stores it and an index within the compaction. For internal nodes, the // compaction is always 0. type nodeIndex struct { compaction int index int } // compaction keeps track of stats used for the compaction. type compaction struct { c Compacter maxHandle uint32 totalSize int // Used by template-based generator and thus exported. Cutoff uint32 Offset uint32 Handler string } func (b *builder) setError(err error) { if b.err == nil { b.err = err } } // An Option can be passed to Gen. type Option func(b *builder) error // Compact configures the trie generator to use the given Compacter. func Compact(c Compacter) Option { return func(b *builder) error { b.Compactions = append(b.Compactions, compaction{ c: c, Handler: c.Handler() + "(n, b)"}) return nil } } // Gen writes Go code for a shared trie lookup structure to w for the given // Tries. The generated trie type will be called nameTrie. newNameTrie(x) will // return the *nameTrie for tries[x]. A value can be looked up by using one of // the various lookup methods defined on nameTrie. It returns the table size of // the generated trie. func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { // The index contains two dummy blocks, followed by the zero block. The zero // block is at offset 0x80, so that the offset for the zero block for // continuation bytes is 0. b := &builder{ Name: name, Trie: tries, IndexBlocks: []*node{{}, {}, {}}, Compactions: []compaction{{ Handler: name + "Values[n<<6+uint32(b)]", }}, // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero // block. indexBlockIdx: map[uint64]int{0: 0}, valueBlockIdx: map[uint64]nodeIndex{0: {}}, asciiBlockIdx: map[uint64]int{}, } b.Compactions[0].c = (*simpleCompacter)(b) for _, f := range opts { if err := f(b); err != nil { return 0, err } } b.build() if b.err != nil { return 0, b.err } if err = b.print(w); err != nil { return 0, err } return b.Size(), nil } // A Trie represents a single root node of a trie. A builder may build several // overlapping tries at once. type Trie struct { root *node hiddenTrie } // hiddenTrie contains values we want to be visible to the template generator, // but hidden from the API documentation. type hiddenTrie struct { Name string Checksum uint64 ASCIIIndex int StarterIndex int } // NewTrie returns a new trie root. func NewTrie(name string) *Trie { return &Trie{ &node{ children: make([]*node, blockSize), values: make([]uint64, utf8.RuneSelf), }, hiddenTrie{Name: name}, } } // Gen is a convenience wrapper around the Gen func passing t as the only trie // and uses the name passed to NewTrie. It returns the size of the generated // tables. func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { return Gen(w, t.Name, []*Trie{t}, opts...) } // node is a node of the intermediate trie structure. type node struct { // children holds this node's children. It is always of length 64. // A child node may be nil. children []*node // values contains the values of this node. If it is non-nil, this node is // either a root or leaf node: // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. values []uint64 index nodeIndex } // Insert associates value with the given rune. Insert will panic if a non-zero // value is passed for an invalid rune. func (t *Trie) Insert(r rune, value uint64) { if value == 0 { return } s := string(r) if []rune(s)[0] != r && value != 0 { // Note: The UCD tables will always assign what amounts to a zero value // to a surrogate. Allowing a zero value for an illegal rune allows // users to iterate over [0..MaxRune] without having to explicitly // exclude surrogates, which would be tedious. panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) } if len(s) == 1 { // It is a root node value (ASCII). t.root.values[s[0]] = value return } n := t.root for ; len(s) > 1; s = s[1:] { if n.children == nil { n.children = make([]*node, blockSize) } p := s[0] % blockSize c := n.children[p] if c == nil { c = &node{} n.children[p] = c } if len(s) > 2 && c.values != nil { log.Fatalf("triegen: insert(%U): found internal node with values", r) } n = c } if n.values == nil { n.values = make([]uint64, blockSize) } if n.children != nil { log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) } n.values[s[0]-0x80] = value } // Size returns the number of bytes the generated trie will take to store. It // needs to be exported as it is used in the templates. func (b *builder) Size() int { // Index blocks. sz := len(b.IndexBlocks) * blockSize * b.IndexSize // Skip the first compaction, which represents the normal value blocks, as // its totalSize does not account for the ASCII blocks, which are managed // separately. sz += len(b.ValueBlocks) * blockSize * b.ValueSize for _, c := range b.Compactions[1:] { sz += c.totalSize } // TODO: this computation does not account for the fixed overhead of a using // a compaction, either code or data. As for data, though, the typical // overhead of data is in the order of bytes (2 bytes for cases). Further, // the savings of using a compaction should anyway be substantial for it to // be worth it. // For multi-root tries, we also need to account for the handles. if len(b.Trie) > 1 { sz += 2 * b.IndexSize * len(b.Trie) } return sz } func (b *builder) build() { // Compute the sizes of the values. var vmax uint64 for _, t := range b.Trie { vmax = maxValue(t.root, vmax) } b.ValueType, b.ValueSize = getIntType(vmax) // Compute all block allocations. // TODO: first compute the ASCII blocks for all tries and then the other // nodes. ASCII blocks are more restricted in placement, as they require two // blocks to be placed consecutively. Processing them first may improve // sharing (at least one zero block can be expected to be saved.) for _, t := range b.Trie { b.Checksum += b.buildTrie(t) } // Compute the offsets for all the Compacters. offset := uint32(0) for i := range b.Compactions { c := &b.Compactions[i] c.Offset = offset offset += c.maxHandle + 1 c.Cutoff = offset } // Compute the sizes of indexes. // TODO: different byte positions could have different sizes. So far we have // not found a case where this is beneficial. imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) for _, ib := range b.IndexBlocks { if x := uint64(ib.index.index); x > imax { imax = x } } b.IndexType, b.IndexSize = getIntType(imax) } func maxValue(n *node, max uint64) uint64 { if n == nil { return max } for _, c := range n.children { max = maxValue(c, max) } for _, v := range n.values { if max < v { max = v } } return max } func getIntType(v uint64) (string, int) { switch { case v < 1<<8: return "uint8", 1 case v < 1<<16: return "uint16", 2 case v < 1<<32: return "uint32", 4 } return "uint64", 8 } const ( blockSize = 64 // Subtract two blocks to offset 0x80, the first continuation byte. blockOffset = 2 // Subtract three blocks to offset 0xC0, the first non-ASCII starter. rootBlockOffset = 3 ) var crcTable = crc64.MakeTable(crc64.ISO) func (b *builder) buildTrie(t *Trie) uint64 { n := t.root // Get the ASCII offset. For the first trie, the ASCII block will be at // position 0. hasher := crc64.New(crcTable) _ = binary.Write(hasher, binary.BigEndian, n.values) hash := hasher.Sum64() v, ok := b.asciiBlockIdx[hash] if !ok { v = len(b.ValueBlocks) b.asciiBlockIdx[hash] = v b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) if v == 0 { // Add the zero block at position 2 so that it will be assigned a // zero reference in the lookup blocks. // TODO: always do this? This would allow us to remove a check from // the trie lookup, but at the expense of extra space. Analyze // performance for unicode/norm. b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) } } t.ASCIIIndex = v // Compute remaining offsets. t.Checksum = b.computeOffsets(n, true) // We already subtracted the normal blockOffset from the index. Subtract the // difference for starter bytes. t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) return t.Checksum } func (b *builder) computeOffsets(n *node, root bool) uint64 { // For the first trie, the root lookup block will be at position 3, which is // the offset for UTF-8 non-ASCII starter bytes. first := len(b.IndexBlocks) == rootBlockOffset if first { b.IndexBlocks = append(b.IndexBlocks, n) } // We special-case the cases where all values recursively are 0. This allows // for the use of a zero block to which all such values can be directed. hash := uint64(0) if n.children != nil || n.values != nil { hasher := crc64.New(crcTable) for _, c := range n.children { var v uint64 if c != nil { v = b.computeOffsets(c, false) } _ = binary.Write(hasher, binary.BigEndian, v) } _ = binary.Write(hasher, binary.BigEndian, n.values) hash = hasher.Sum64() } if first { b.indexBlockIdx[hash] = rootBlockOffset - blockOffset } // Compacters don't apply to internal nodes. if n.children != nil { v, ok := b.indexBlockIdx[hash] if !ok { v = len(b.IndexBlocks) - blockOffset b.IndexBlocks = append(b.IndexBlocks, n) b.indexBlockIdx[hash] = v } n.index = nodeIndex{0, v} } else { h, ok := b.valueBlockIdx[hash] if !ok { bestI, bestSize := 0, blockSize*b.ValueSize for i, c := range b.Compactions[1:] { if sz, ok := c.c.Size(n.values); ok && bestSize > sz { bestI, bestSize = i+1, sz } } c := &b.Compactions[bestI] c.totalSize += bestSize v := c.c.Store(n.values) if c.maxHandle < v { c.maxHandle = v } h = nodeIndex{bestI, int(v)} b.valueBlockIdx[hash] = h } n.index = h } return hash } golang-github-clipperhouse-displaywidth-0.11.0+ds/internal/gen/unicode.go000066400000000000000000000255131515060771000264770ustar00rootroot00000000000000// Package unicode handles parsing of Unicode data files for string width calculation package main import ( "bufio" "fmt" "io" "net/http" "os" "path/filepath" "strconv" "strings" "unicode" ) // UnicodeData contains all the parsed Unicode character properties type UnicodeData struct { EastAsianWidth map[rune]string // From EastAsianWidth.txt ExtendedPictographic map[rune]bool // From emoji-data.txt (Extended_Pictographic property) EmojiPresentation map[rune]bool // From emoji-data.txt (Emoji_Presentation property) RegionalIndicator map[rune]bool // From emoji-data.txt (Regional Indicator symbols, range 1F1E6..1F1FF) ControlChars map[rune]bool // From Go stdlib CombiningMarks map[rune]bool // From Go stdlib (Mn, Me only - Mc excluded for proper width) ZeroWidthChars map[rune]bool // Special zero-width characters } // property represents the properties of a character type property uint8 // PropertyDefinition describes a single character property flag type PropertyDefinition struct { Name string Comment string } // PropertyDefinitions is the single source of truth for all character properties. // The order matters - it defines the bit positions (via iota). var PropertyDefinitions = []PropertyDefinition{ {"Zero_Width", "Always 0 width, includes combining marks, control characters, non-printable, etc"}, {"Wide", "Always 2 wide (East Asian Wide F/W, Emoji, Regional Indicator)"}, {"East_Asian_Ambiguous", "Width depends on EastAsianWidth option"}, } // these constants are used to build the property bitmap, internally. // the external properties are above. Keep them in the same order! const ( // ZWSP, ZWJ, ZWNJ, etc. zero_Width property = iota + 1 // F, W (East Asian Wide), Emoji, Regional Indicator wide // A (East Asian Ambiguous) east_Asian_Ambiguous ) // ParseUnicodeData downloads and parses all required Unicode data files func ParseUnicodeData() (*UnicodeData, error) { data := &UnicodeData{ EastAsianWidth: make(map[rune]string), ExtendedPictographic: make(map[rune]bool), EmojiPresentation: make(map[rune]bool), RegionalIndicator: make(map[rune]bool), ControlChars: make(map[rune]bool), CombiningMarks: make(map[rune]bool), ZeroWidthChars: make(map[rune]bool), } const unicodeVersion = "17.0.0" // Create data directory dataDir := filepath.Join("data", unicodeVersion) if err := os.MkdirAll(dataDir, 0755); err != nil { return nil, fmt.Errorf("failed to create data directory: %v", err) } eawFile := filepath.Join(dataDir, "EastAsianWidth.txt") if err := downloadFile(fmt.Sprintf("https://unicode.org/Public/%s/ucd/EastAsianWidth.txt", unicodeVersion), eawFile); err != nil { return nil, fmt.Errorf("failed to download EastAsianWidth.txt: %v", err) } if err := parseEastAsianWidth(eawFile, data); err != nil { return nil, fmt.Errorf("failed to parse EastAsianWidth.txt: %v", err) } emojiFile := filepath.Join(dataDir, "emoji-data.txt") if err := downloadFile(fmt.Sprintf("https://unicode.org/Public/%s/ucd/emoji/emoji-data.txt", unicodeVersion), emojiFile); err != nil { fmt.Printf("Warning: failed to download emoji-data.txt: %v\n", err) fmt.Println("Continuing with basic emoji detection from Go stdlib...") } else { if err := parseEmojiData(emojiFile, data); err != nil { fmt.Printf("Warning: failed to parse emoji-data.txt: %v\n", err) fmt.Println("Continuing with basic emoji detection from Go stdlib...") } } extractStdlibData(data) return data, nil } // downloadFile downloads a file from URL to local path func downloadFile(url, filepath string) error { // Check if file already exists if _, err := os.Stat(filepath); err == nil { fmt.Printf("File %s already exists, skipping download\n", filepath) return nil } fmt.Printf("Downloading %s...\n", url) resp, err := http.Get(url) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("bad status: %s", resp.Status) } out, err := os.Create(filepath) if err != nil { return err } defer out.Close() _, err = io.Copy(out, resp.Body) if err != nil { return err } fmt.Printf("Downloaded %s\n", filepath) return nil } // parseEastAsianWidth parses the EastAsianWidth.txt file func parseEastAsianWidth(filename string, data *UnicodeData) error { file, err := os.Open(filename) if err != nil { return err } defer file.Close() scanner := bufio.NewScanner(file) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" || strings.HasPrefix(line, "#") { continue } parts := strings.Split(line, ";") if len(parts) < 2 { continue } rangeStr := strings.TrimSpace(parts[0]) widthStr := strings.TrimSpace(parts[1]) // Remove comments from width string if commentIndex := strings.Index(widthStr, "#"); commentIndex != -1 { widthStr = strings.TrimSpace(widthStr[:commentIndex]) } // Parse range if strings.Contains(rangeStr, "..") { // Range of codepoints rangeParts := strings.Split(rangeStr, "..") if len(rangeParts) != 2 { continue } start, err1 := strconv.ParseInt(rangeParts[0], 16, 32) end, err2 := strconv.ParseInt(rangeParts[1], 16, 32) if err1 != nil || err2 != nil { continue } for r := rune(start); r <= rune(end); r++ { data.EastAsianWidth[r] = widthStr } } else { // Single codepoint codepoint, err := strconv.ParseInt(rangeStr, 16, 32) if err != nil { continue } data.EastAsianWidth[rune(codepoint)] = widthStr } } return scanner.Err() } // parseEmojiData parses the emoji-data.txt file for Extended_Pictographic and Emoji_Presentation func parseEmojiData(filename string, data *UnicodeData) error { file, err := os.Open(filename) if err != nil { return err } defer file.Close() scanner := bufio.NewScanner(file) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" || strings.HasPrefix(line, "#") { continue } // Parse line format: ; # parts := strings.Split(line, ";") if len(parts) < 2 { continue } rangeStr := strings.TrimSpace(parts[0]) propertyStr := strings.TrimSpace(parts[1]) // Remove comments from property string if commentIndex := strings.Index(propertyStr, "#"); commentIndex != -1 { propertyStr = strings.TrimSpace(propertyStr[:commentIndex]) } var r1, r2 rune // Parse range if strings.Contains(rangeStr, "..") { // Range of codepoints rangeParts := strings.Split(rangeStr, "..") if len(rangeParts) != 2 { continue } start, err1 := strconv.ParseInt(rangeParts[0], 16, 32) end, err2 := strconv.ParseInt(rangeParts[1], 16, 32) if err1 != nil || err2 != nil { continue } r1, r2 = rune(start), rune(end) } else { // Single codepoint codepoint, err := strconv.ParseInt(rangeStr, 16, 32) if err != nil { continue } r1, r2 = rune(codepoint), rune(codepoint) } // Skip characters below 0xFF (ASCII range is handled specially) if r2 < 0xFF { continue } // Check if this is a Regional Indicator character (range 1F1E6..1F1FF) // Regional Indicator characters can appear with any property, but we identify them by range const regionalIndicatorStart = 0x1F1E6 const regionalIndicatorEnd = 0x1F1FF if r1 >= regionalIndicatorStart && r2 <= regionalIndicatorEnd { // Add all Regional Indicator characters to the RegionalIndicator map for r := r1; r <= r2; r++ { data.RegionalIndicator[r] = true } // Don't add them to ExtendedPictographic or EmojiPresentation maps continue } // We're only interested in Extended_Pictographic and Emoji_Presentation for non-Regional Indicator characters if propertyStr != "Extended_Pictographic" && propertyStr != "Emoji_Presentation" { continue } // Add to the appropriate map for r := r1; r <= r2; r++ { switch propertyStr { case "Extended_Pictographic": data.ExtendedPictographic[r] = true case "Emoji_Presentation": data.EmojiPresentation[r] = true } } } return scanner.Err() } // extractStdlibData extracts character properties from Go's unicode package func extractStdlibData(data *UnicodeData) { // Extract control characters // Skip 0x00-0x1F and 0x7F as they're handled by the fast path in width.go // Only add C1 controls (0x80-0x9F) which are multi-byte in UTF-8 for r := rune(0x80); r <= 0x9F; r++ { data.ControlChars[r] = true // C1 controls } // Extract combining marks using range tables for efficiency // Mn: Nonspacing_Mark, Me: Enclosing_Mark // Note: Mc (Spacing Mark) characters are excluded so they get default width 1 extractRunesFromRangeTable(unicode.Mn, data.CombiningMarks) extractRunesFromRangeTable(unicode.Me, data.CombiningMarks) // Cf (Other, format) is the official Unicode category for format characters // which are generally invisible and have zero width. extractRunesFromRangeTable(unicode.Cf, data.ZeroWidthChars) // Zl (Other, line separator) is the official Unicode category for line separator characters // which are generally invisible and have zero width. extractRunesFromRangeTable(unicode.Zl, data.ZeroWidthChars) // Zp (Other, paragraph separator) is the official Unicode category for paragraph separator characters // which are generally invisible and have zero width. extractRunesFromRangeTable(unicode.Zp, data.ZeroWidthChars) // Noncharacters (U+nFFFE and U+nFFFF) data.ZeroWidthChars[0xFFFE] = true data.ZeroWidthChars[0xFFFF] = true } // extractRunesFromRangeTable efficiently extracts all runes from a Unicode range table func extractRunesFromRangeTable(table *unicode.RangeTable, target map[rune]bool) { // Iterate over 16-bit ranges for _, r16 := range table.R16 { for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) { target[r] = true } } // Iterate over 32-bit ranges for _, r32 := range table.R32 { for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) { target[r] = true } } } func buildPropertyBitmap(r rune, data *UnicodeData) property { if data.CombiningMarks[r] { return zero_Width } if data.ControlChars[r] { return zero_Width } if data.ZeroWidthChars[r] { return zero_Width } // As a practical matter, we probably don't need separate properties for // Emoji and East Asian Wide, as I believe they lead to the same // result. I made this distinction for VS15 handling. However, // eventually I came to the conclusion that VS15 is a no-op for width // calculation. Keeping the distinction for now. // Check for Regional Indicator before emoji if data.RegionalIndicator[r] { return wide } if data.ExtendedPictographic[r] && data.EmojiPresentation[r] { return wide } if eaw, exists := data.EastAsianWidth[r]; exists { switch eaw { case "F", "W": return wide case "A": return east_Asian_Ambiguous // H (Halfwidth), Na (Narrow), and N (Neutral) are not stored // as they all result in width 1 (default behavior) } } return 0 } golang-github-clipperhouse-displaywidth-0.11.0+ds/options.go000066400000000000000000000023621515060771000241540ustar00rootroot00000000000000package displaywidth // Options allows you to specify the treatment of ambiguous East Asian // characters and ANSI escape sequences. type Options struct { // EastAsianWidth specifies whether to treat ambiguous East Asian characters // as width 1 or 2. When false (default), ambiguous East Asian characters // are treated as width 1. When true, they are width 2. EastAsianWidth bool // ControlSequences specifies whether to ignore 7-bit ECMA-48 escape sequences // when calculating the display width. When false (default), ANSI escape // sequences are treated as just a series of characters. When true, they are // treated as a single zero-width unit. ControlSequences bool // ControlSequences8Bit specifies whether to ignore 8-bit ECMA-48 escape sequences // when calculating the display width. When false (default), these are treated // as just a series of characters. When true, they are treated as a single // zero-width unit. ControlSequences8Bit bool } // DefaultOptions is the default options for the display width // calculation, which is EastAsianWidth false, ControlSequences false, and // ControlSequences8Bit false. var DefaultOptions = Options{ EastAsianWidth: false, ControlSequences: false, ControlSequences8Bit: false, } golang-github-clipperhouse-displaywidth-0.11.0+ds/terminal-test/000077500000000000000000000000001515060771000247175ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/terminal-test/main.go000066400000000000000000000055451515060771000262030ustar00rootroot00000000000000package main import ( "fmt" "os" "strings" "unicode/utf8" "github.com/clipperhouse/displaywidth" ) func main() { if len(os.Args) > 1 && os.Args[1] == "--help" { fmt.Println("Usage: terminal-test") fmt.Println("Tests the actual terminal display width of regional indicator symbols") fmt.Println("by using visual alignment tests.") fmt.Println() fmt.Println("Run from a terminal to see visual alignment results.") os.Exit(0) } fmt.Println("=== Terminal Regional Indicator Width Test ===") fmt.Printf("TERM: %s\n", os.Getenv("TERM")) fmt.Println() // Test characters singleRI := "🇦" // U+1F1E6 pairRI := "🇺🇸" // US flag regularEmoji := "😀" ascii := "a" cjk := "中" testCases := []struct { name string char string width int }{ {"Single Regional Indicator", singleRI, displaywidth.String(singleRI)}, {"Regional Indicator Pair (flag)", pairRI, displaywidth.String(pairRI)}, {"Regular Emoji", regularEmoji, displaywidth.String(regularEmoji)}, {"ASCII", ascii, displaywidth.String(ascii)}, {"CJK", cjk, displaywidth.String(cjk)}, } fmt.Println("Package calculated widths:") for _, tc := range testCases { fmt.Printf(" %s (%s): %d columns\n", tc.name, tc.char, tc.width) } fmt.Println() // Visual alignment tests fmt.Println("=== Visual Alignment Tests ===") fmt.Println("Check if the markers align correctly with the characters.") fmt.Println("If aligned: terminal width matches package calculation.") fmt.Println("If misaligned: terminal rendering differs from package.") fmt.Println() for _, tc := range testCases { visualTest(tc.char, tc.name, tc.width) fmt.Println() } fmt.Println("=== Summary ===") fmt.Println("Compare the visual alignment above.") fmt.Println("The '^' marker shows the START of the character.") fmt.Println("The 'x' marker shows the expected END position (start + width).") fmt.Println("If characters align with the markers, the package calculation is correct.") } // visualTest prints a visual alignment test func visualTest(char string, label string, expectedWidth int) { fmt.Printf("--- %s: %s ---\n", label, char) // Print alignment markers with known-width characters marker := "0123456789" testLine := marker + char + marker fmt.Println(testLine) // Print caret marker at the start of the character caretStart := len(marker) caretLine := strings.Repeat(" ", caretStart) + "^ (start)" + strings.Repeat(" ", len(marker)-6) fmt.Println(caretLine) // Print expected end position marker expectedEnd := len(marker) + expectedWidth if expectedEnd <= len(testLine) { expectedLine := strings.Repeat(" ", expectedEnd) + "x (expected end, width=" + fmt.Sprintf("%d", expectedWidth) + ")" fmt.Println(expectedLine) } // Print information runeCount := utf8.RuneCountInString(char) fmt.Printf(" UTF-8 bytes: %d | Runes: %d | Package width: %d\n", len(char), runeCount, expectedWidth) } golang-github-clipperhouse-displaywidth-0.11.0+ds/testdata/000077500000000000000000000000001515060771000237405ustar00rootroot00000000000000golang-github-clipperhouse-displaywidth-0.11.0+ds/testdata/UTF-8-test.txt000066400000000000000000000543751515060771000262770ustar00rootroot00000000000000UTF-8 decoder capability and stress test ---------------------------------------- Markus Kuhn - 2015-08-28 - CC BY 4.0 This test file can help you examine, how your UTF-8 decoder handles various types of correct, malformed, or otherwise interesting UTF-8 sequences. This file is not meant to be a conformance test. It does not prescribe any particular outcome. Therefore, there is no way to "pass" or "fail" this test file, even though the text does suggest a preferable decoder behaviour at some places. Its aim is, instead, to help you think about, and test, the behaviour of your UTF-8 decoder on a systematic collection of unusual inputs. Experience so far suggests that most first-time authors of UTF-8 decoders find at least one serious problem in their decoder using this file. The test lines below cover boundary conditions, malformed UTF-8 sequences, as well as correctly encoded UTF-8 sequences of Unicode code points that should never occur in a correct UTF-8 file. According to ISO 10646-1:2000, sections D.7 and 2.3c, a device receiving UTF-8 shall interpret a "malformed sequence in the same way that it interprets a character that is outside the adopted subset" and "characters that are not within the adopted subset shall be indicated to the user" by a receiving device. One commonly used approach in UTF-8 decoders is to replace any malformed UTF-8 sequence by a replacement character (U+FFFD), which looks a bit like an inverted question mark, or a similar symbol. It might be a good idea to visually distinguish a malformed UTF-8 sequence from a correctly encoded Unicode character that is just not available in the current font but otherwise fully legal, even though ISO 10646-1 doesn't mandate this. In any case, just ignoring malformed sequences or unavailable characters does not conform to ISO 10646, will make debugging more difficult, and can lead to user confusion. Please check, whether a malformed UTF-8 sequence is (1) represented at all, (2) represented by exactly one single replacement character (or equivalent signal), and (3) the following quotation mark after an illegal UTF-8 sequence is correctly displayed, i.e. proper resynchronization takes place immediately after any malformed sequence. This file says "THE END" in the last line, so if you don't see that, your decoder crashed somehow before, which should always be cause for concern. All lines in this file are exactly 79 characters long (plus the line feed). In addition, all lines end with "|", except for the two test lines 2.1.1 and 2.2.1, which contain non-printable ASCII controls U+0000 and U+007F. If you display this file with a fixed-width font, these "|" characters should all line up in column 79 (right margin). This allows you to test quickly, whether your UTF-8 decoder finds the correct number of characters in every line, that is whether each malformed sequences is replaced by a single replacement character. Note that, as an alternative to the notion of malformed sequence used here, it is also a perfectly acceptable (and in some situations even preferable) solution to represent each individual byte of a malformed sequence with a replacement character. If you follow this strategy in your decoder, then please ignore the "|" column. Here come the tests: | | 1 Some correct UTF-8 text | | You should see the Greek word 'kosme': "κόσμε" | | 2 Boundary condition test cases | | 2.1 First possible sequence of a certain length | | 2.1.1 1 byte (U-00000000): "" 2.1.2 2 bytes (U-00000080): "€" | 2.1.3 3 bytes (U-00000800): "à €" | 2.1.4 4 bytes (U-00010000): "ð€€" | 2.1.5 5 bytes (U-00200000): "øˆ€€€" | 2.1.6 6 bytes (U-04000000): "ü„€€€€" | | 2.2 Last possible sequence of a certain length | | 2.2.1 1 byte (U-0000007F): "" 2.2.2 2 bytes (U-000007FF): "ß¿" | 2.2.3 3 bytes (U-0000FFFF): "ï¿¿" | 2.2.4 4 bytes (U-001FFFFF): "÷¿¿¿" | 2.2.5 5 bytes (U-03FFFFFF): "û¿¿¿¿" | 2.2.6 6 bytes (U-7FFFFFFF): "ý¿¿¿¿¿" | | 2.3 Other boundary conditions | | 2.3.1 U-0000D7FF = ed 9f bf = "퟿" | 2.3.2 U-0000E000 = ee 80 80 = "" | 2.3.3 U-0000FFFD = ef bf bd = "�" | 2.3.4 U-0010FFFF = f4 8f bf bf = "ô¿¿" | 2.3.5 U-00110000 = f4 90 80 80 = "ô€€" | | 3 Malformed sequences | | 3.1 Unexpected continuation bytes | | Each unexpected continuation byte should be separately signalled as a | malformed sequence of its own. | | 3.1.1 First continuation byte 0x80: "€" | 3.1.2 Last continuation byte 0xbf: "¿" | | 3.1.3 2 continuation bytes: "€¿" | 3.1.4 3 continuation bytes: "€¿€" | 3.1.5 4 continuation bytes: "€¿€¿" | 3.1.6 5 continuation bytes: "€¿€¿€" | 3.1.7 6 continuation bytes: "€¿€¿€¿" | 3.1.8 7 continuation bytes: "€¿€¿€¿€" | | 3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf): | | "€‚ƒ„…†‡ˆ‰Š‹ŒŽ | ‘’“”•–—˜™š›œžŸ |  ¡¢£¤¥¦§¨©ª«¬­®¯ | °±²³´µ¶·¸¹º»¼½¾¿" | | 3.2 Lonely start characters | | 3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf), | each followed by a space character: | | "À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï | Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß " | | 3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef), | each followed by a space character: | | "à á â ã ä å æ ç è é ê ë ì í î ï " | | 3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7), | each followed by a space character: | | "ð ñ ò ó ô õ ö ÷ " | | 3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb), | each followed by a space character: | | "ø ù ú û " | | 3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd), | each followed by a space character: | | "ü ý " | | 3.3 Sequences with last continuation byte missing | | All bytes of an incomplete sequence should be signalled as a single | malformed sequence, i.e., you should see only a single replacement | character in each of the next 10 tests. (Characters as in section 2) | | 3.3.1 2-byte sequence with last byte missing (U+0000): "À" | 3.3.2 3-byte sequence with last byte missing (U+0000): "à€" | 3.3.3 4-byte sequence with last byte missing (U+0000): "ð€€" | 3.3.4 5-byte sequence with last byte missing (U+0000): "ø€€€" | 3.3.5 6-byte sequence with last byte missing (U+0000): "ü€€€€" | 3.3.6 2-byte sequence with last byte missing (U-000007FF): "ß" | 3.3.7 3-byte sequence with last byte missing (U-0000FFFF): "ï¿" | 3.3.8 4-byte sequence with last byte missing (U-001FFFFF): "÷¿¿" | 3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): "û¿¿¿" | 3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): "ý¿¿¿¿" | | 3.4 Concatenation of incomplete sequences | | All the 10 sequences of 3.3 concatenated, you should see 10 malformed | sequences being signalled: | | "Àà€ð€€ø€€€ü€€€€ßï¿÷¿¿û¿¿¿ý¿¿¿¿" | | 3.5 Impossible bytes | | The following two bytes cannot appear in a correct UTF-8 string | | 3.5.1 fe = "þ" | 3.5.2 ff = "ÿ" | 3.5.3 fe fe ff ff = "þþÿÿ" | | 4 Overlong sequences | | The following sequences are not malformed according to the letter of | the Unicode 2.0 standard. However, they are longer then necessary and | a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8 | decoder" should reject them just like malformed sequences for two | reasons: (1) It helps to debug applications if overlong sequences are | not treated as valid representations of characters, because this helps | to spot problems more quickly. (2) Overlong sequences provide | alternative representations of characters, that could maliciously be | used to bypass filters that check only for ASCII characters. For | instance, a 2-byte encoded line feed (LF) would not be caught by a | line counter that counts only 0x0a bytes, but it would still be | processed as a line feed by an unsafe UTF-8 decoder later in the | pipeline. From a security point of view, ASCII compatibility of UTF-8 | sequences means also, that ASCII characters are *only* allowed to be | represented by ASCII bytes in the range 0x00-0x7f. To ensure this | aspect of ASCII compatibility, use only "safe UTF-8 decoders" that | reject overlong UTF-8 sequences for which a shorter encoding exists. | | 4.1 Examples of an overlong ASCII character | | With a safe UTF-8 decoder, all of the following five overlong | representations of the ASCII character slash ("/") should be rejected | like a malformed UTF-8 sequence, for instance by substituting it with | a replacement character. If you see a slash below, you do not have a | safe UTF-8 decoder! | | 4.1.1 U+002F = c0 af = "À¯" | 4.1.2 U+002F = e0 80 af = "à€¯" | 4.1.3 U+002F = f0 80 80 af = "ð€€¯" | 4.1.4 U+002F = f8 80 80 80 af = "ø€€€¯" | 4.1.5 U+002F = fc 80 80 80 80 af = "ü€€€€¯" | | 4.2 Maximum overlong sequences | | Below you see the highest Unicode value that is still resulting in an | overlong sequence if represented with the given number of bytes. This | is a boundary test for safe UTF-8 decoders. All five characters should | be rejected like malformed UTF-8 sequences. | | 4.2.1 U-0000007F = c1 bf = "Á¿" | 4.2.2 U-000007FF = e0 9f bf = "àŸ¿" | 4.2.3 U-0000FFFF = f0 8f bf bf = "ð¿¿" | 4.2.4 U-001FFFFF = f8 87 bf bf bf = "ø‡¿¿¿" | 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "üƒ¿¿¿¿" | | 4.3 Overlong representation of the NUL character | | The following five sequences should also be rejected like malformed | UTF-8 sequences and should not be treated like the ASCII NUL | character. | | 4.3.1 U+0000 = c0 80 = "À€" | 4.3.2 U+0000 = e0 80 80 = "à€€" | 4.3.3 U+0000 = f0 80 80 80 = "ð€€€" | 4.3.4 U+0000 = f8 80 80 80 80 = "ø€€€€" | 4.3.5 U+0000 = fc 80 80 80 80 80 = "ü€€€€€" | | 5 Illegal code positions | | The following UTF-8 sequences should be rejected like malformed | sequences, because they never represent valid ISO 10646 characters and | a UTF-8 decoder that accepts them might introduce security problems | comparable to overlong UTF-8 sequences. | | 5.1 Single UTF-16 surrogates | | 5.1.1 U+D800 = ed a0 80 = "í €" | 5.1.2 U+DB7F = ed ad bf = "í­¿" | 5.1.3 U+DB80 = ed ae 80 = "í®€" | 5.1.4 U+DBFF = ed af bf = "í¯¿" | 5.1.5 U+DC00 = ed b0 80 = "í°€" | 5.1.6 U+DF80 = ed be 80 = "í¾€" | 5.1.7 U+DFFF = ed bf bf = "í¿¿" | | 5.2 Paired UTF-16 surrogates | | 5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = "𐀀" | 5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = "𐏿" | 5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = "í­¿í°€" | 5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = "í­¿í¿¿" | 5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = "󰀀" | 5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = "󰏿" | 5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = "􏰀" | 5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = "􏿿" | | 5.3 Noncharacter code positions | | The following "noncharacters" are "reserved for internal use" by | applications, and according to older versions of the Unicode Standard | "should never be interchanged". Unicode Corrigendum #9 dropped the | latter restriction. Nevertheless, their presence in incoming UTF-8 data | can remain a potential security risk, depending on what use is made of | these codes subsequently. Examples of such internal use: | | - Some file APIs with 16-bit characters may use the integer value -1 | = U+FFFF to signal an end-of-file (EOF) or error condition. | | - In some UTF-16 receivers, code point U+FFFE might trigger a | byte-swap operation (to convert between UTF-16LE and UTF-16BE). | | With such internal use of noncharacters, it may be desirable and safer | to block those code points in UTF-8 decoders, as they should never | occur legitimately in incoming UTF-8 data, and could trigger unsafe | behaviour in subsequent processing. | | Particularly problematic noncharacters in 16-bit applications: | | 5.3.1 U+FFFE = ef bf be = "￾" | 5.3.2 U+FFFF = ef bf bf = "ï¿¿" | | Other noncharacters: | | 5.3.3 U+FDD0 .. U+FDEF = "ï·ï·‘﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜ï·ï·žï·Ÿï· ï·¡ï·¢ï·£ï·¤ï·¥ï·¦ï·§ï·¨ï·©ï·ªï·«ï·¬ï·­ï·®ï·¯"| | 5.3.4 U+nFFFE U+nFFFF (for n = 1..10) | | "🿾🿿𯿾𯿿𿿾𿿿ñ¿¾ñ¿¿ñŸ¿¾ñŸ¿¿ñ¯¿¾ñ¯¿¿ñ¿¿¾ñ¿¿¿ò¿¾ò¿¿ | òŸ¿¾òŸ¿¿ò¯¿¾ò¯¿¿ò¿¿¾ò¿¿¿ó¿¾ó¿¿óŸ¿¾óŸ¿¿ó¯¿¾ó¯¿¿ó¿¿¾ó¿¿¿ô¿¾ô¿¿" | | THE END | golang-github-clipperhouse-displaywidth-0.11.0+ds/testdata/sample.txt000066400000000000000000001042221515060771000257630ustar00rootroot00000000000000Generated with help from Grok: https://grok.com/share/bGVnYWN5LWNvcHk%3D_6b7f7d01-4d33-4677-b81b-709f6a8c2ed6 and Cursor. ## English (Latin Script, mostly ASCII) - New York City New York City, often simply referred to as New York, is the most populous city in the United States. With a population of over 8.8 million people within its city limits and around 20 million in the metropolitan area, it stands as a global hub for finance, culture, media, and entertainment. The city is divided into five boroughs: Manhattan, Brooklyn, Queens, the Bronx, and Staten Island, each offering its unique flavor and attractions. One of the most iconic landmarks in New York is the Statue of Liberty, a gift from France symbolizing freedom and democracy. Located on Liberty Island in New York Harbor, it attracts millions of visitors annually who take ferries to see this colossal neoclassical sculpture up close. Another must-see is the Empire State Building, an Art Deco skyscraper that was once the tallest building in the world. From its observation deck on the 86th floor, one can enjoy panoramic views of the city skyline, especially stunning at sunset. Central Park, spanning 843 acres in the heart of Manhattan, serves as a green oasis amid the urban jungle. It's a popular spot for jogging, picnicking, and boating on the lake. The park also hosts events like concerts and Shakespeare in the Park performances during the summer. Times Square, known as "The Crossroads of the World," is famous for its bright neon lights, Broadway theaters, and the annual New Year's Eve ball drop, drawing crowds from around the globe. Sports play a big role in New York's culture. The New York Yankees, a Major League Baseball team, have won 27 World Series championships, more than any other team. They play at Yankee Stadium in the Bronx. Basketball fans root for the New York Knicks, who call Madison Square Garden home, a venue also used for hockey games by the New York Rangers. Football enthusiasts support the New York Giants and New York Jets, both playing at MetLife Stadium in nearby New Jersey. The city's diverse population contributes to its vibrant food scene. From street vendors selling hot dogs and pretzels to high-end restaurants offering cuisines from every corner of the world, there's something for every palate. Pizza slices, bagels with cream cheese, and cheesecake are quintessential New York treats. The subway system, with its 472 stations, is the lifeline of the city, transporting millions daily despite occasional delays. Education and innovation thrive here too. Institutions like Columbia University and New York University attract students globally. Wall Street in Lower Manhattan is the epicenter of the financial world, housing the New York Stock Exchange. Museums such as the Metropolitan Museum of Art and the Museum of Modern Art showcase priceless collections spanning centuries. New York is a city that never sleeps, with nightlife ranging from rooftop bars in Manhattan to live music venues in Brooklyn. Shopping destinations like Fifth Avenue offer luxury brands, while flea markets provide unique finds. The High Line, an elevated park built on a former rail line, offers a peaceful walk with art installations and gardens. Despite challenges like high living costs and traffic congestion, New York's energy and opportunities draw people from all walks of life. It's a melting pot where cultures blend, ideas flourish, and dreams are pursued relentlessly. Whether you're a tourist snapping photos at the Brooklyn Bridge or a local commuting to work, the city's pulse is infectious. ## French (Latin Script with Diacritics) - Paris Paris, souvent appelée «la Ville Lumière», est la capitale de la France et l'une des villes les plus visitées au monde. Avec une population d'environ 2,1 millions d'habitants intra-muros et plus de 12 millions dans l'agglomération, elle est un centre mondial pour l'art, la mode, la gastronomie et la culture. La ville est divisée en 20 arrondissements, chacun avec son caractère unique. L'un des monuments les plus iconiques est la Tour Eiffel, construite pour l'Exposition Universelle de 1889. Située sur le Champ de Mars, elle offre des vues époustouflantes depuis son sommet, surtout illuminée la nuit. Un autre incontournable est le Musée du Louvre, abritant des chefs-d'Å“uvre comme la Mona Lisa et la Vénus de Milo, attirant des millions de visiteurs chaque année. Les Champs-Élysées, avenue prestigieuse menant à l'Arc de Triomphe, est bordée de boutiques de luxe, cafés et cinémas. Le quartier de Montmartre, avec la Basilique du Sacré-CÅ“ur, est connu pour ses artistes de rue et son ambiance bohème. La Seine traverse la ville, avec des ponts célèbres comme le Pont Neuf, idéal pour des croisières fluviales. En sports, le Paris Saint-Germain (PSG) domine le football français, jouant au Parc des Princes et comptant des stars internationales. Le rugby a le Stade Français, et le tennis brille avec Roland-Garros, tournoi du Grand Chelem. Le basket avec des équipes en Pro A, et le cyclisme avec le Tour de France finissant sur les Champs-Élysées. La cuisine française est renommée : croissants, baguettes, escargots, fromages et vins. Les marchés comme celui de la Rue Cler offrent des produits frais. Le métro parisien, avec ses 16 lignes et stations art nouveau, est efficace bien que bondé aux heures de pointe. Institutions comme la Sorbonne attirent étudiants du monde entier. Quartiers comme Le Marais mêlent histoire et modernité avec galeries et boutiques. Musées comme Orsay exposent impressionnistes. La vie nocturne à Pigalle ou Oberkampf inclut cabarets comme le Moulin Rouge. Paris affronte des défis comme le coût de la vie et le tourisme massif, mais des initiatives vertes comme Vélib' promeuvent les vélos. Jardins comme les Tuileries offrent repos. L'Opéra Garnier accueille ballets. En somme, Paris incarne l'élégance et le romantisme. ## Arabic (Arabic Script, RTL) - القاهرة القاهرة، عاصمة مصر، هي واحدة من أكبر المدن ÙÙŠ العالم العربي ÙˆØ§Ù„Ø£ÙØ±ÙŠÙ‚ÙŠ. مع سكان يبلغ عددهم حوالي 10 ملايين نسمة داخل المدينة وأكثر من 20 مليون ÙÙŠ المنطقة الحضرية، ØªÙØ¹ØªØ¨Ø± مركزاً ثقاÙياً وتاريخياً وسياسياً نابضاً. المدينة مقسمة إلى أحياء عديدة، كل منها يحمل طابعاً ÙØ±ÙŠØ¯Ø§Ù‹. من أبرز المعالم السياحية أهرامات الجيزة، التي تشمل هرم خوÙÙˆ الكبير، رمزاً للحضارة Ø§Ù„ÙØ±Ø¹ÙˆÙ†ÙŠØ© القديمة. تقع على هضبة الجيزة غرب النيل، وتجذب ملايين الزوار سنوياً لاستكشا٠عجائبها. كما يوجد المتح٠المصري ÙÙŠ ميدان التحرير، الذي يحتوي على كنوز توت عنخ آمون وتماثيل ÙØ±Ø¹ÙˆÙ†ÙŠØ©. ميدان التحرير هو قلب المدينة، حيث وقعت ثورة 2011ØŒ ويحيط به مبان٠حكومية ÙˆÙنادق. خان الخليلي، سوق تاريخي، مثالي للتسوق من المجوهرات والتوابل. نهر النيل يعبر المدينة، مع جسور مثل قصر النيل، مناسبة للنزهات المسائية. ÙÙŠ الرياضة، النادي الأهلي والزمالك هما الخصمان الأبديان ÙÙŠ كرة القدم، يلعبان ÙÙŠ استاد القاهرة الدولي. المنتخب المصري حقق كأس Ø£ÙØ±ÙŠÙ‚يا عدة مرات. كرة السلة مع أندية مثل الجزيرة، والكرة الطائرة شائعة أيضاً. سباقات الخيل ÙÙŠ نادي الجزيرة. المطبخ المصري شهي: كشري، Ùول مدمس، محشي، ÙˆÙƒÙ†Ø§ÙØ©. أسواق مثل عتبة ØªÙˆÙØ± مكونات طازجة. مترو القاهرة، أول ÙÙŠ Ø£ÙØ±ÙŠÙ‚يا، ينقل ملايين يومياً رغم الازدحام. مؤسسات مثل جامعة القاهرة تجذب طلاباً عربياً. أحياء مثل الزمالك تمزج Ø§Ù„ÙØ®Ø§Ù…Ø© بالتاريخ. متاح٠مثل متح٠الÙÙ† الإسلامي تعرض تحÙ. الحياة الليلية ÙÙŠ وسط البلد تشمل مقاهي ومطاعم. تواجه القاهرة تحديات مثل الزحام والتلوث، لكن مبادرات مثل متروبوليتان تساعد. حدائق مثل الأزهر ØªÙˆÙØ± راحة. دار الأوبرا تستضي٠حÙلات. باختصار، القاهرة تجسد التاريخ والحيوية. ## Chinese (Simplified Han Script) - 北京 åŒ—äº¬æ˜¯ä¸­å›½é¦–éƒ½ï¼Œæ˜¯ä¸–ç•Œä¸Šäººå£æœ€å¤šçš„城市之一。市区人å£çº¦2150万,都市区超过2500万,是政治ã€ç»æµŽå’Œæ–‡åŒ–中心。城市分为多个区,æ¯ä¸ªåŒºéƒ½æœ‰ç‹¬ç‰¹ç‰¹è‰²ã€‚ 最著å的地标是故宫,åˆç§°ç´«ç¦åŸŽï¼Œæ˜Žæ¸…王æœçš„皇宫,现在是åšç‰©é¦†ï¼Œå±•示数åƒä»¶æ–‡ç‰©ã€‚å¦ä¸€ä¸ªæ˜¯å¤©å®‰é—¨å¹¿åœºï¼Œä¸–界上最大的广场,举办国庆庆典。长城在北京郊区,如八达岭段,å¸å¼•游客攀登。 é¢å’Œå›­æ˜¯çš‡å®¶å›­æž—,å åœ°290å…¬é¡·ï¼Œé€‚åˆæ•£æ­¥å’Œåˆ’船。王府井大街是购物天堂,有å°åƒå¦‚åŒ—äº¬çƒ¤é¸­ã€‚èƒ¡åŒæ˜¯ä¼ ç»Ÿå··é“,ä¿ç•™è€åŒ—京风貌,骑自行车游览ä¸é”™ã€‚ 体育方é¢ï¼ŒåŒ—京国安是中超足çƒé˜Ÿï¼Œä¸»åœºå·¥äººä½“育场。北京首钢篮çƒé˜Ÿåœ¨CBA。奥è¿ä¼š2008åœ¨åŒ—äº¬ä¸¾åŠžï¼Œæ°´ç«‹æ–¹å’Œé¸Ÿå·¢æ˜¯æ ‡å¿—ã€‚ä¹’ä¹“çƒæ˜¯ä¸­å›½å›½çƒï¼ŒåŒ—京有许多俱ä¹éƒ¨ã€‚ 中国èœä¸°å¯Œï¼šé¥ºå­ã€å®«ä¿é¸¡ä¸ã€éº»å©†è±†è…。å°åƒè¡—如å—锣鼓巷æä¾›è¡—头美食。地é“系统有20多æ¡çº¿ï¼Œé«˜æ•ˆä½†é«˜å³°æœŸæ‹¥æŒ¤ã€‚ 机构如清åŽå¤§å­¦å¸å¼•å…¨çƒå­¦ç”Ÿã€‚CBD是商业区,高楼林立。åšç‰©é¦†å¦‚首都åšç‰©é¦†å±•出历å²ã€‚夜生活在三里屯,包括酒å§å’Œé¤åŽ…ã€‚ 北京é¢å¯¹é›¾éœ¾å’Œäº¤é€šé—®é¢˜ï¼Œä½†åœ°é“扩展帮助。公园如北海公园æä¾›ä¼‘闲。国家大剧院举办演出。总之,北京体现传统与现代èžåˆã€‚ ## Japanese (Hiragana, Katakana, Kanji) - æ±äº¬ æ±äº¬ã¯æ—¥æœ¬ã®é¦–都ã§ã€ä¸–ç•Œã§æœ€ã‚‚人å£ã®å¤šã„都市ã®ä¸€ã¤ã§ã™ã€‚市区人å£ç´„1400万人ã€éƒ½å¸‚åœã§3700万人以上ã§ã€æ”¿æ²»ã€çµŒæ¸ˆã€æ–‡åŒ–ã®ä¸­å¿ƒã§ã™ã€‚23ã®ç‰¹åˆ¥åŒºã«åˆ†ã‹ã‚Œã€ãれãžã‚Œç‹¬è‡ªã®é­…力ãŒã‚りã¾ã™ã€‚ 最も有åãªãƒ©ãƒ³ãƒ‰ãƒžãƒ¼ã‚¯ã¯æ±äº¬ã‚¿ãƒ¯ãƒ¼ã€ãƒ†ãƒ¬ãƒ“å¡”ã§å¤œæ™¯ãŒç¾Žã—ã„。もã†ä¸€ã¤ã¯æµ…è‰å¯ºã€æ±äº¬ã®å¤ã„寺院ã§ã€é›·é–€ãŒè±¡å¾´çš„。渋谷ã®ã‚¹ã‚¯ãƒ©ãƒ³ãƒ–ル交差点ã¯ä¸–ç•Œä¸€ã®æ­©è¡Œè€…æ•°ã§çŸ¥ã‚‰ã‚Œã¾ã™ã€‚ 皇居ã¯å¤©çš‡ã®ä½å±…ã€å‘¨å›²ã¯ã‚¸ãƒ§ã‚®ãƒ³ã‚°ã‚³ãƒ¼ã‚¹ã€‚秋葉原ã¯ã‚¢ãƒ‹ãƒ¡ã¨é›»å­æ©Ÿå™¨ã®è–地。六本木ã¯ç¾ä»£ã‚¢ãƒ¼ãƒˆã¨é«˜å±¤ãƒ“ルã§ã€æ£®ç¾Žè¡“館ãŒã‚りã¾ã™ã€‚ スãƒãƒ¼ãƒ„ã§ã¯ã€èª­å£²ã‚¸ãƒ£ã‚¤ã‚¢ãƒ³ãƒ„ãŒãƒ—ロ野çƒã®äººæ°—ãƒãƒ¼ãƒ ã€æ±äº¬ãƒ‰ãƒ¼ãƒ ã§ãƒ—レー。サッカーã®FCæ±äº¬ã¨æµ¦å’Œãƒ¬ãƒƒã‚ºã®ãƒ©ã‚¤ãƒãƒ«ã€‚相撲ã¯ä¸¡å›½å›½æŠ€é¤¨ã§ã€ã‚ªãƒªãƒ³ãƒ”ック2020ã®ä¼šå ´ã‚‚残りã¾ã™ã€‚ 日本食ã¯å¯¿å¸ã€å¤©ã·ã‚‰ã€ãƒ©ãƒ¼ãƒ¡ãƒ³ã€‚築地市場跡ã®è±Šæ´²å¸‚å ´ã§æ–°é®®é­šã€‚地下鉄ã¯13ç·šã€åŠ¹çŽ‡çš„ã ãŒãƒ©ãƒƒã‚·ãƒ¥æ™‚混雑。 機関如æ±äº¬å¤§å­¦ãŒå­¦ç”Ÿã‚’集ã‚。新宿ã¯ãƒ“ジãƒã‚¹åŒºã€‚åšç‰©é¤¨å¦‚国立åšç‰©é¤¨å±•示歴å²ã€‚夜éŠã³ã¯éŠ€åº§ã®ãƒãƒ¼ã€‚ æ±äº¬ã¯åœ°éœ‡ã¨é«˜ç‰©ä¾¡ã®èª²é¡Œã ãŒã€åœ°éœ‡å¯¾ç­–é€²ã‚€ã€‚å…¬åœ’å¦‚ä¸Šé‡Žå…¬åœ’ã§æ¡œè¦‹ã€‚国立劇場ã§å…¬æ¼”。ã¾ã¨ã‚ã€æ±äº¬ã¯ä¼çµ±ã¨é©æ–°ã®èžåˆã€‚ ## Korean (Hangul Script) - 서울 ì„œìš¸ì€ í•œêµ­ì˜ ìˆ˜ë„로, 세계ì—서 가장 ì¸êµ¬ê°€ ë§Žì€ ë„시 중 하나입니다. ë„시 ì¸êµ¬ 약 1000ë§Œ 명, 수ë„ê¶Œ 2500ë§Œ 명 ì´ìƒìœ¼ë¡œ, 정치, 경제, 문화 중심입니다. 25ê°œì˜ êµ¬ë¡œ 나뉘어 ê°ìž ë…특한 ë§¤ë ¥ì„ ê°€ì§‘ë‹ˆë‹¤. 가장 유명한 랜드마í¬ëŠ” 경복ê¶, ì¡°ì„  ì™•ì¡°ì˜ ê¶ì „으로 ì—­ì‚¬ì  ê°€ì¹˜ê°€ 높습니다. ë˜ í•˜ë‚˜ëŠ” 남산타워, ì„œìš¸ì˜ ì „ë§ì„ ì¦ê¸¸ 수 있는 곳입니다. 명ë™ì€ 쇼핑과 거리 ìŒì‹ìœ¼ë¡œ 유명합니다. í•œê°•ì€ ë„시를 가로지르며, ìžì „ê±° ë„로와 ê³µì›ì´ ë§Žì•„ ì‚°ì±…ì— ì¢‹ìŠµë‹ˆë‹¤. ì¸ì‚¬ë™ì€ 전통 시장으로, 공예품과 차를 팔아요. í™ëŒ€ëŠ” 거리 공연과 ì Šì€ ë¬¸í™”ì˜ ì¤‘ì‹¬ìž…ë‹ˆë‹¤. 스í¬ì¸ ì—서 FC ì„œìš¸ì´ K리그 축구 팀, 잠실 경기장ì—서 플레ì´í•©ë‹ˆë‹¤. 야구는 ë‘ì‚° 베어스와 LG 트윈스 ë¼ì´ë²Œ. ë†êµ¬ KBL 팀들, 올림픽 1988 유ì ë„ 있습니다. 한국 ìŒì‹ì€ 비빔밥, 김치찌개, 불고기. í™ëŒ€ë‚˜ ì´íƒœì›ì—서 글로벌 요리. 지하철 9호선 ì´ìƒ, 효율ì ì´ì§€ë§Œ 출퇴근 혼잡. 기관처럼 ì„œìš¸ëŒ€í•™êµ í•™ìƒ ìœ ì¹˜. ê°•ë‚¨ì€ ë¹„ì¦ˆë‹ˆìŠ¤ 구역. 박물관 국가박물관 역사 전시. 밤문화 ì´íƒœì› ë°”. ì„œìš¸ì€ êµí†µê³¼ 미세먼지 문제 ì§ë©´í•˜ì§€ë§Œ, 버스 ê°œí˜ ë„움. ê³µì› ì—¬ì˜ë„ 휴ì‹. 오페ë¼í•˜ìš°ìФ 공연. 요약, 서울 전통과 현대 융합. ## Russian (Cyrillic Script) - МоÑква МоÑква, Ñтолица РоÑÑии, ÑвлÑетÑÑ Ð¾Ð´Ð½Ð¸Ð¼ из крупнейших городов мира. С наÑелением около 12,6 миллионов человек в городе и более 20 миллионов в агломерации, она центр политики, Ñкономики и культуры. Город разделён на 12 админиÑтративных округов, каждый Ñо Ñвоим характером. Одна из Ñамых иконичеÑких доÑтопримечательноÑтей — КраÑÐ½Ð°Ñ Ð¿Ð»Ð¾Ñ‰Ð°Ð´ÑŒ Ñ Ñобором ВаÑÐ¸Ð»Ð¸Ñ Ð‘Ð»Ð°Ð¶ÐµÐ½Ð½Ð¾Ð³Ð¾ и мавзолеем Ленина. ЗдеÑÑŒ проходÑÑ‚ парады и феÑтивали. Ещё один — Кремль, Ñ€ÐµÐ·Ð¸Ð´ÐµÐ½Ñ†Ð¸Ñ Ð¿Ñ€ÐµÐ·Ð¸Ð´ÐµÐ½Ñ‚Ð°, Ñ Ð¼ÑƒÐ·ÐµÑми и Ñоборами. Ðрбат — Ð¿ÐµÑˆÐµÑ…Ð¾Ð´Ð½Ð°Ñ ÑƒÐ»Ð¸Ñ†Ð° Ñ ÑƒÐ»Ð¸Ñ‡Ð½Ñ‹Ð¼Ð¸ артиÑтами и Ñувенирами. Парк Горького, 120 гектаров, идеален Ð´Ð»Ñ ÐºÐ°Ñ‚Ð°Ð½Ð¸Ñ Ð½Ð° коньках зимой и велоÑипедов летом. Метро МоÑквы извеÑтно ÑтанциÑми как дворцами, Ñ Ð¼Ð¾Ð·Ð°Ð¸ÐºÐ°Ð¼Ð¸ и люÑтрами. Ð’ Ñпорте ЦСКРи Спартак — вечные Ñоперники в футболе, играют на Ñтадионах вроде Лужников, где был ЧМ-2018. Хоккей Ñ Ð”Ð¸Ð½Ð°Ð¼Ð¾, баÑкетбол Ñ Ð¦Ð¡ÐšÐ. Ð¢ÐµÐ½Ð½Ð¸Ñ Ð² ОлимпийÑком. РуÑÑÐºÐ°Ñ ÐºÑƒÑ…Ð½Ñ: блины, борщ, пельмени, икра. Рынки как ДаниловÑкий предлагают Ñвежие продукты. Метро Ñ 15 линиÑми перевозит миллионы, Ñффективно неÑÐ¼Ð¾Ñ‚Ñ€Ñ Ð½Ð° пробки. ИнÑтитуты вроде МГУ привлекают Ñтудентов. ТверÑÐºÐ°Ñ â€” коммерчеÑÐºÐ°Ñ ÑƒÐ»Ð¸Ñ†Ð°. Музеи вроде ТретьÑковÑкой галереи показывают иÑкуÑÑтво. ÐÐ¾Ñ‡Ð½Ð°Ñ Ð¶Ð¸Ð·Ð½ÑŒ в клубах на КраÑном ОктÑбре. МоÑква ÑталкиваетÑÑ Ñ Ð¿Ñ€Ð¾Ð±ÐºÐ°Ð¼Ð¸ и зимним холодом, но ÑлектробуÑÑ‹ помогают. Парки вроде ВДÐÐ¥ Ð´Ð»Ñ Ð¾Ñ‚Ð´Ñ‹Ñ…Ð°. Большой театр принимает балеты. Ð’ итоге, МоÑква воплощает иÑторию и динамику. ## Hindi (Devanagari Script) - मà¥à¤‚बई मà¥à¤‚बई, भारत की आरà¥à¤¥à¤¿à¤• राजधानी, दà¥à¤¨à¤¿à¤¯à¤¾ के सबसे बड़े शहरों में से à¤à¤• है। शहर की आबादी लगभग 1.2 करोड़ है और महानगरीय कà¥à¤·à¥‡à¤¤à¥à¤° में 2 करोड़ से अधिक, यह वितà¥à¤¤, फिलà¥à¤® और संसà¥à¤•ृति का केंदà¥à¤° है। शहर कई हिसà¥à¤¸à¥‹à¤‚ में बंटा है, हर à¤à¤• का अपना आकरà¥à¤·à¤£ है। सबसे पà¥à¤°à¤¸à¤¿à¤¦à¥à¤§ सà¥à¤¥à¤² गेटवे ऑफ इंडिया है, बà¥à¤°à¤¿à¤Ÿà¤¿à¤¶ काल का सà¥à¤®à¤¾à¤°à¤•, समà¥à¤¦à¥à¤° के किनारे सà¥à¤¥à¤¿à¤¤à¥¤ à¤à¤• और है छतà¥à¤°à¤ªà¤¤à¤¿ शिवाजी टरà¥à¤®à¤¿à¤¨à¤¸, यूनेसà¥à¤•ो सà¥à¤¥à¤², विकà¥à¤Ÿà¥‹à¤°à¤¿à¤¯à¤¨ गोथिक शैली में। मरीन डà¥à¤°à¤¾à¤‡à¤µ रानी का हार जैसा दिखता है, शाम की सैर के लिठआदरà¥à¤¶à¥¤ जà¥à¤¹à¥‚ बीच समà¥à¤¦à¥à¤° तट है, जहां लोग पिकनिक मनाते हैं। बॉलीवà¥à¤¡ फिलà¥à¤® सिटी में शूटिंग होती है। कोलाबा बाजार खरीदारी के लिà¤, गहने और कपड़े। खेल में मà¥à¤‚बई इंडियंस आईपीà¤à¤² कà¥à¤°à¤¿à¤•ेट टीम, वानखेड़े सà¥à¤Ÿà¥‡à¤¡à¤¿à¤¯à¤® में खेलती है, विशà¥à¤µ कप 2011 जीता। फà¥à¤Ÿà¤¬à¥‰à¤² मà¥à¤‚बई सिटी à¤à¤«à¤¸à¥€, हॉकी भी लोकपà¥à¤°à¤¿à¤¯à¥¤ भारतीय वà¥à¤¯à¤‚जन: वड़ा पाव, पाव भाजी, बिरयानी। बाजार जैसे कà¥à¤°à¥‰à¤«à¤°à¥à¤¡ ताजा सामगà¥à¤°à¥€à¥¤ मेटà¥à¤°à¥‹ और लोकल टà¥à¤°à¥‡à¤¨ लाखों को ले जाती, हालांकि भीड़भाड़। संसà¥à¤¥à¤¾à¤¨ जैसे आईआईटी छातà¥à¤°à¥‹à¤‚ को आकरà¥à¤·à¤¿à¤¤à¥¤ बांदà¥à¤°à¤¾ लगà¥à¤œà¤°à¥€ कà¥à¤·à¥‡à¤¤à¥à¤°à¥¤ संगà¥à¤°à¤¹à¤¾à¤²à¤¯ जैसे पà¥à¤°à¤¿à¤‚स ऑफ वेलà¥à¤¸ पà¥à¤°à¤¦à¤°à¥à¤¶à¤¨à¥€à¥¤ नाइटलाइफ़ जà¥à¤¹à¥‚ बार में। मà¥à¤‚बई टà¥à¤°à¥ˆà¤«à¤¿à¤• और मानसून की चà¥à¤¨à¥Œà¤¤à¤¿à¤¯à¥‹à¤‚ का सामना, लेकिन मोनोरेल मदद। पारà¥à¤• जैसे हैंगिंग गारà¥à¤¡à¤¨ आराम। ओपेरा हाउस शो। संकà¥à¤·à¥‡à¤ª में, मà¥à¤‚बई इतिहास और ऊरà¥à¤œà¤¾ का मिशà¥à¤°à¤£à¥¤ ## Thai (Thai Script) - à¸à¸£à¸¸à¸‡à¹€à¸—พมหานคร à¸à¸£à¸¸à¸‡à¹€à¸—พมหานคร หรือที่เรียà¸à¸ªà¸±à¹‰à¸™ ๆ ว่า à¸à¸£à¸¸à¸‡à¹€à¸—พฯ เป็นเมืองหลวงของประเทศไทย à¹à¸¥à¸°à¹€à¸›à¹‡à¸™à¸«à¸™à¸¶à¹ˆà¸‡à¹ƒà¸™à¹€à¸¡à¸·à¸­à¸‡à¸—ี่มีชีวิตชีวาที่สุดในเอเชียตะวันออà¸à¹€à¸‰à¸µà¸¢à¸‡à¹ƒà¸•้ ด้วยประชาà¸à¸£à¸›à¸£à¸°à¸¡à¸²à¸“ 8 ล้านคนในตัวเมือง à¹à¸¥à¸°à¸¡à¸²à¸à¸à¸§à¹ˆà¸² 14 ล้านคนในเขตปริมณฑล à¸à¸£à¸¸à¸‡à¹€à¸—พฯ เป็นศูนย์à¸à¸¥à¸²à¸‡à¸‚องà¸à¸²à¸£à¹€à¸¡à¸·à¸­à¸‡ เศรษà¸à¸à¸´à¸ˆ à¹à¸¥à¸°à¸§à¸±à¸’นธรรม เมืองนี้à¹à¸šà¹ˆà¸‡à¸­à¸­à¸à¹€à¸›à¹‡à¸™ 50 เขต โดยà¹à¸•่ละเขตมีเอà¸à¸¥à¸±à¸à¸©à¸“์เฉพาะตัว สถานที่สำคัà¸à¸—ี่โดดเด่นที่สุดคือวัดพระà¹à¸à¹‰à¸§ ซึ่งตั้งอยู่ในพระบรมมหาราชวัง เป็นที่ประดิษà¸à¸²à¸™à¸‚องพระà¹à¸à¹‰à¸§à¸¡à¸£à¸à¸• อันเป็นสัà¸à¸¥à¸±à¸à¸©à¸“์ทางศาสนาของชาติ วัดโพธิ์ ซึ่งอยู่ใà¸à¸¥à¹‰à¹€à¸„ียง เป็นที่รู้จัà¸à¹ƒà¸™à¸à¸²à¸™à¸°à¸§à¸±à¸”ที่มีพระพุทธไสยาสน์ขนาดใหà¸à¹ˆ à¹à¸¥à¸°à¹€à¸›à¹‡à¸™à¸¨à¸¹à¸™à¸¢à¹Œà¸à¸¥à¸²à¸‡à¸à¸²à¸£à¸™à¸§à¸”à¹à¸œà¸™à¹„ทย ตลาดน้ำดำเนินสะดวà¸à¹€à¸›à¹‡à¸™à¸ªà¸–านที่ท่องเที่ยวยอดนิยม ที่นัà¸à¸—่องเที่ยวสามารถนั่งเรือชมวิถีชีวิตริมน้ำ ถนนข้าวสารเป็นจุดหมายสำหรับนัà¸à¸—่องเที่ยวที่ต้องà¸à¸²à¸£à¸ªà¸±à¸¡à¸œà¸±à¸ªà¸šà¸£à¸£à¸¢à¸²à¸à¸²à¸¨à¸¢à¸²à¸¡à¸„่ำคืนà¹à¸¥à¸°à¸­à¸²à¸«à¸²à¸£à¸‚้างทาง à¹à¸¡à¹ˆà¸™à¹‰à¸³à¹€à¸ˆà¹‰à¸²à¸žà¸£à¸°à¸¢à¸²à¹„หลผ่านเมือง มีสะพานพระราม 8 ที่สวยงาม à¹à¸¥à¸°à¹€à¸«à¸¡à¸²à¸°à¸ªà¸³à¸«à¸£à¸±à¸šà¸à¸²à¸£à¸¥à¹ˆà¸­à¸‡à¹€à¸£à¸·à¸­à¸Šà¸¡à¸§à¸´à¸§ สวนลุมพินีเป็นสวนสาธารณะขนาด 57.6 เฮà¸à¸•าร์ เหมาะสำหรับà¸à¸²à¸£à¸§à¸´à¹ˆà¸‡à¸«à¸£à¸·à¸­à¸žà¸±à¸à¸œà¹ˆà¸­à¸™ ในด้านà¸à¸µà¸¬à¸² เมืองทองยูไนเต็ดเป็นทีมฟุตบอลชั้นนำในไทยลีภเล่นที่สนามเอสซีจี สเตเดี้ยม มวยไทยเป็นà¸à¸µà¸¬à¸²à¹à¸šà¸šà¸”ั้งเดิมที่ได้รับความนิยม สนามมวยลุมพินีà¹à¸¥à¸°à¸£à¸²à¸Šà¸”ำเนินเป็นสถานที่จัดà¹à¸‚่งขันสำคัภวอลเลย์บอลà¹à¸¥à¸°à¸•ะà¸à¸£à¹‰à¸­à¸à¹‡à¸¡à¸µà¹à¸Ÿà¸™à¸„ลับจำนวนมาภอาหารไทยมีชื่อเสียงระดับโลà¸: ต้มยำà¸à¸¸à¹‰à¸‡ ผัดไทย ส้มตำ à¹à¸¥à¸°à¸‚้าวเหนียวมะม่วง ตลาดนัดจตุจัà¸à¸£à¸¡à¸µà¸­à¸²à¸«à¸²à¸£à¹à¸¥à¸°à¸ªà¸´à¸™à¸„้าหลาà¸à¸«à¸¥à¸²à¸¢ รถไฟฟ้าบีทีเอสà¹à¸¥à¸°à¹€à¸­à¹‡à¸¡à¸­à¸²à¸£à¹Œà¸—ีขนส่งผู้คนนับล้าน à¹à¸¡à¹‰à¸ˆà¸°à¹à¸­à¸­à¸±à¸”ในชั่วโมงเร่งด่วน สถาบันเช่นจุฬาลงà¸à¸£à¸“์มหาวิทยาลัยดึงดูดนัà¸à¸¨à¸¶à¸à¸©à¸² สยามเป็นย่านช้อปปิ้งที่มีห้างสรรพสินค้าชั้นนำ พิพิธภัณฑ์สถานà¹à¸«à¹ˆà¸‡à¸Šà¸²à¸•ิà¹à¸ªà¸”งประวัติศาสตร์ ไนต์ไลฟ์ในสุขุมวิทมีบาร์à¹à¸¥à¸°à¸„ลับ à¸à¸£à¸¸à¸‡à¹€à¸—พฯ เผชิà¸à¸›à¸±à¸à¸«à¸²à¸£à¸–ติดà¹à¸¥à¸°à¸™à¹‰à¸³à¸—่วม à¹à¸•่รถไฟฟ้าสายใหม่ช่วยได้ สวนเบà¸à¸ˆà¸à¸´à¸•ิให้พื้นที่พัà¸à¸œà¹ˆà¸­à¸™ โรงละครà¹à¸«à¹ˆà¸‡à¸Šà¸²à¸•ิมีà¸à¸²à¸£à¹à¸ªà¸”ง à¸à¸£à¸¸à¸‡à¹€à¸—พฯ เป็นเมืองที่ผสมผสานวัฒนธรรมเà¸à¹ˆà¸²à¹à¸¥à¸°à¹ƒà¸«à¸¡à¹ˆ ## Bengali (Bengali Script) - কলকাতা কলকাতা, পশà§à¦šà¦¿à¦®à¦¬à¦™à§à¦—ের রাজধানী, ভারতের অনà§à¦¯à¦¤à¦® পà§à¦°à¦§à¦¾à¦¨ শহর। শহরের জনসংখà§à¦¯à¦¾ পà§à¦°à¦¾à¦¯à¦¼ ৪৫ লকà§à¦· à¦à¦¬à¦‚ মহানগর à¦à¦²à¦¾à¦•ায় দেড় কোটিরও বেশি, à¦à¦Ÿà¦¿ সংসà§à¦•ৃতি, শিলà§à¦ª ও শিকà§à¦·à¦¾à¦° কেনà§à¦¦à§à¦°à¥¤ শহরটি বিভিনà§à¦¨ ওয়ারà§à¦¡à§‡ বিভকà§à¦¤, পà§à¦°à¦¤à¦¿à¦Ÿà¦¿à¦° নিজসà§à¦¬ বৈশিষà§à¦Ÿà§à¦¯ রয়েছে। সবচেয়ে বিখà§à¦¯à¦¾à¦¤ লà§à¦¯à¦¾à¦¨à§à¦¡à¦®à¦¾à¦°à§à¦• হল ভিকà§à¦Ÿà§‹à¦°à¦¿à¦¯à¦¼à¦¾ মেমোরিয়াল, বà§à¦°à¦¿à¦Ÿà¦¿à¦¶ আমলের à¦à¦•টি সাদা মারà§à¦¬à§‡à¦² সà§à¦®à§ƒà¦¤à¦¿à¦¸à§à¦¤à¦®à§à¦­, যা যাদà§à¦˜à¦° হিসেবে কাজ করে। হাওড়া বà§à¦°à¦¿à¦œ, গঙà§à¦—া নদীর উপর নিরà§à¦®à¦¿à¦¤, শহরের পà§à¦°à¦¤à§€à¦• à¦à¦¬à¦‚ ভারতের বà§à¦¯à¦¸à§à¦¤à¦¤à¦® সেতà§à¦—à§à¦²à¦¿à¦° à¦à¦•টি। দকà§à¦·à¦¿à¦£à§‡à¦¶à§à¦¬à¦° মনà§à¦¦à¦¿à¦°, কালী মাতার উৎসরà§à¦—ীকৃত, ভকà§à¦¤à¦¦à§‡à¦° আকরà§à¦·à¦£ করে। ময়দান হল à¦à¦•টি বিশাল উদà§à¦¯à¦¾à¦¨, ৪০০ হেকà§à¦Ÿà¦°, ফà§à¦Ÿà¦¬à¦² à¦à¦¬à¦‚ ঘà§à¦¡à¦¼à¦¿ ওড়ানোর জনà§à¦¯ উপযà§à¦•à§à¦¤à¥¤ কà§à¦®à¦¾à¦°à¦Ÿà§à¦²à¦¿ হল মূরà§à¦¤à¦¿ তৈরির কেনà§à¦¦à§à¦°, বিশেষত দà§à¦°à§à¦—াপূজার জনà§à¦¯à¥¤ নিউ মারà§à¦•েটে কেনাকাটা, থেকে কাপড় থেকে মশলা। খেলাধà§à¦²à¦¾à¦¯à¦¼, ইসà§à¦Ÿ বেঙà§à¦—ল à¦à¦¬à¦‚ মোহনবাগান ফà§à¦Ÿà¦¬à¦²à§‡à¦° চিরপà§à¦°à¦¤à¦¿à¦¦à§à¦¬à¦¨à§à¦¦à§à¦¬à§€, ইডেন গারà§à¦¡à§‡à¦¨à§‡ খেলে, যা কà§à¦°à¦¿à¦•েটেরও কেনà§à¦¦à§à¦°à¥¤ কলকাতা নাইট রাইডারà§à¦¸ আইপিà¦à¦² দল। কাবাডি à¦à¦¬à¦‚ বà§à¦¯à¦¾à¦¡à¦®à¦¿à¦¨à§à¦Ÿà¦¨à¦“ জনপà§à¦°à¦¿à¦¯à¦¼à¥¤ বাঙালি খাবার: রসগোলà§à¦²à¦¾, সনà§à¦¦à§‡à¦¶, মাছের à¦à§‹à¦²à¥¤ পারà§à¦• সà§à¦Ÿà§à¦°à¦¿à¦Ÿà§‡ বিশà§à¦¬à§‡à¦° খাবার। মেটà§à¦°à§‹ রেল দিনে লকà§à¦·à¦¾à¦§à¦¿à¦• যাতà§à¦°à§€ বহন করে, যদিও ভিড় থাকে। পà§à¦°à¦¤à¦¿à¦·à§à¦ à¦¾à¦¨ যেমন কলকাতা বিশà§à¦¬à¦¬à¦¿à¦¦à§à¦¯à¦¾à¦²à¦¯à¦¼ ছাতà§à¦°à¦¦à§‡à¦° আকরà§à¦·à¦£ করে। সলà§à¦Ÿà¦²à§‡à¦• হল বà§à¦¯à¦¬à¦¸à¦¾à¦¯à¦¼à¦¿à¦• à¦à¦²à¦¾à¦•া। ভারতীয় জাদà§à¦˜à¦°à§‡ ইতিহাস পà§à¦°à¦¦à¦°à§à¦¶à¦¨à¥¤ রাতের জীবন রবীনà§à¦¦à§à¦°à¦¸à¦¦à¦¨à§‡ নাটক। কলকাতা টà§à¦°à¦¾à¦«à¦¿à¦• à¦à¦¬à¦‚ বরà§à¦·à¦¾à¦° সমসà§à¦¯à¦¾à¦° মà§à¦–োমà§à¦–ি, তবে টà§à¦°à¦¾à¦® সাহাযà§à¦¯ করে। পারà§à¦• যেমন ইকো পারà§à¦• বিশà§à¦°à¦¾à¦®à§‡à¦° জনà§à¦¯à¥¤ বিজà§à¦žà¦¾à¦¨ নগরী পà§à¦°à¦¦à¦°à§à¦¶à¦¨à§€à¥¤ সংকà§à¦·à§‡à¦ªà§‡, কলকাতা ইতিহাস ও জীবনীশকà§à¦¤à¦¿à¦° মিশà§à¦°à¦£à¥¤ ## Hebrew (Hebrew Script, RTL) - ×™×¨×•×©×œ×™× ×™×¨×•×©×œ×™×, בירת ישר×ל, ×”×™× ×חת ×”×¢×¨×™× ×”×¢×ª×™×§×•×ª והמשמעותיות בעול×. ×¢× ×וכלוסייה של ×›-950,000 ×ª×•×©×‘×™× ×‘×¢×™×¨ עצמה ויותר מ-1.2 מיליון ב×זור המטרופוליני, ×”×™× ×ž×¨×›×– דתי, תרבותי ופוליטי. העיר מחולקת לשכונות רבות, כל ×חת ×¢× ×ופי ייחודי משלה. ×חד מציוני הדרך ×”××™×™×§×•× ×™×™× ×‘×™×•×ª×¨ ×”×•× ×”×›×•×ª×œ המערבי, שריד ממקדש שלמה השני, ×ž×§×•× ×ª×¤×™×œ×” קדוש ליהודי×. העיר העתיקה, ×תר מורשת עולמית של ×ונסק"ו, כוללת ×ת כנסיית הקבר, הר הבית ומסגד ×ל-×קצ×. מגדל דוד, ליד שער יפו, משמש כמוזי×ון להיסטוריה של העיר. שוק מחנה יהודה ×”×•× ×ž×¨×›×– תוסס לקניות של תוצרת טרייה, ×ª×‘×œ×™× ×™× ×•×ž××›×œ×™× ×ž×§×•×ž×™×™×, והופך בלילה למרכז ×‘×™×œ×•×™×™× ×¢× ×‘×¨×™×. גן ס×קר ×”×•× ×¤×רק גדול המשמש לפיקניקי×, ספורט ו××™×¨×•×¢×™× ×¦×™×‘×•×¨×™×™×. רחוב בן יהודה ×”×•× ×זור להולכי רגל ×¢× ×—× ×•×™×•×ª ובתי קפה. בספורט, מכבי ×™×¨×•×©×œ×™× ×”×™× ×§×‘×•×¦×ª כדורסל מובילה, המשחקת ב×רנה של ירושלי×. קבוצות כדורגל כמו בית"ר ×™×¨×•×©×œ×™× ×ž×©×—×§×•×ª ב×צטדיון טדי, שמ×רח ×’× ×ª×—×¨×•×™×•×ª ×תלטיקה. ×˜×™×•×œ×™× ×¨×’×œ×™×™× ×•×¨×›×™×‘×” על ××•×¤× ×™×™× ×¤×•×¤×•×œ×¨×™×™× ×‘×”×¨×™ יהודה שמסביב. המטבח הישר×לי כולל חומוס, פל×פל, שקשוקה ושוו×רמה. ×©×•×•×§×™× ×›×ž×• שוק הכרמל ×ž×¦×™×¢×™× ×ž×¨×›×™×‘×™× ×˜×¨×™×™×. מערכת ×”××•×˜×•×‘×•×¡×™× ×•×”×¨×›×‘×ª הקלה משנעות מיליוני×, ×× ×›×™ ×œ×¢×™×ª×™× ×¢× ×¢×•×ž×¡ בשעות השי×. מוסדות כמו ×”×וניברסיטה העברית ×ž×•×©×›×™× ×¡×˜×•×“× ×˜×™× ×ž×›×œ העול×. שכונת רחביה משלבת היסטוריה ו×דריכלות מודרנית. מוזי××•× ×™× ×›×ž×• מוזי×ון ישר×ל ×ž×¦×™×’×™× ×ת מגילות ×™× ×”×ž×œ×—. ×—×™×™ הלילה במרכז העיר ×›×•×œ×œ×™× ×¤××‘×™× ×•×ž×•×¤×¢×™ מוזיקה ×—×™×”. ×™×¨×•×©×œ×™× ×ž×ª×ž×•×“×“×ª ×¢× ××ª×’×¨×™× ×›×ž×• ×ž×ª×—×™× ×¤×•×œ×™×˜×™×™× ×•×¢×•×ž×¡ תיירותי, ×ך יוזמות כמו פ×רק המסילה משפרות ×ת המרחב הציבורי. ×’× ×™× ×›×ž×• גן ×”×•×•×¨×“×™× ×ž×¦×™×¢×™× ×ž× ×•×—×”. תי×טרון ×™×¨×•×©×œ×™× ×ž×רח הצגות. בקיצור, ×™×¨×•×©×œ×™× ×”×™× ×©×™×œ×•×‘ של קדושה וחיוניות. ## Greek (Greek Script) - Αθήνα Η Αθήνα, Ï€ÏωτεÏουσα της Ελλάδας, είναι μία από τις παλαιότεÏες πόλεις στον κόσμο, γνωστή ως λίκνο του Î´Ï…Ï„Î¹ÎºÎ¿Ï Ï€Î¿Î»Î¹Ï„Î¹ÏƒÎ¼Î¿Ï. Με πληθυσμό πεÏίπου 3,2 εκατομμÏÏια κατοίκους στην πόλη και πάνω από 4 εκατομμÏÏια στη μητÏοπολιτική πεÏιοχή, αποτελεί κέντÏο πολιτικής, οικονομίας και πολιτισμοÏ. Η πόλη χωÏίζεται σε διάφοÏες γειτονιές, καθεμία με τη δική της ταυτότητα. Ένα από τα πιο εμβληματικά αξιοθέατα είναι η ΑκÏόπολη, με τον ΠαÏθενώνα, έναν αÏχαίο ναό αφιεÏωμένο στη θεά Αθηνά, που Ï€ÏοσελκÏει εκατομμÏÏια επισκέπτες. Το Εθνικό ΑÏχαιολογικό Μουσείο φιλοξενεί ανεκτίμητα ευÏήματα από την αÏχαιότητα, όπως το άγαλμα του Ποσειδώνα. Η Πλάκα, μια γÏαφική συνοικία στους Ï€Ïόποδες της ΑκÏόπολης, είναι γεμάτη στενά δÏομάκια και παÏαδοσιακές ταβέÏνες. Η πλατεία Συντάγματος είναι η καÏδιά της πόλης, όπου βÏίσκεται το ΚοινοβοÏλιο και Ï€ÏαγματοποιοÏνται παÏελάσεις. Ο Λυκαβηττός, ένας λόφος στο κέντÏο, Ï€ÏοσφέÏει πανοÏαμική θέα, ιδανική για ηλιοβασιλέματα. Το ΜοναστηÏάκι είναι γνωστό για την αγοÏά του, με αντίκες και σουβενίÏ. Στον αθλητισμό, ο Παναθηναϊκός και ο Ολυμπιακός είναι οι μεγάλοι αντίπαλοι στο ποδόσφαιÏο, παίζοντας στο Ολυμπιακό Στάδιο, που φιλοξένησε τους ΟλυμπιακοÏÏ‚ Αγώνες του 2004. Το μπάσκετ είναι επίσης δημοφιλές με τον Παναθηναϊκό BC. Το τένις και η ιστιοπλοÎα Ï€ÏοσελκÏουν φαν στην ακτή. Η ελληνική κουζίνα πεÏιλαμβάνει σουβλάκι, μουσακά, τζατζίκι και φέτα. Οι αγοÏές όπως η ΚεντÏική ΑγοÏά ΒαÏβάκειος Ï€ÏοσφέÏουν φÏέσκα Ï€Ïοϊόντα. Το μετÏÏŒ της Αθήνας, με Ï„Ïεις γÏαμμές, είναι αποτελεσματικό, αν και πολυσÏχναστο τις ÏŽÏες αιχμής. ΙδÏÏματα όπως το Εθνικό και ΚαποδιστÏιακό Πανεπιστήμιο Ï€ÏοσελκÏουν φοιτητές. Η Κηφισιά είναι πολυτελής πεÏιοχή με μπουτίκ. Μουσεία όπως το Μουσείο Μπενάκη παÏουσιάζουν τέχνη. Η νυχτεÏινή ζωή στο Γκάζι πεÏιλαμβάνει κλαμπ και ζωντανή μουσική. Η Αθήνα αντιμετωπίζει Ï€Ïοκλήσεις όπως η κίνηση και η οικονομική κÏίση, αλλά έÏγα όπως το Ελληνικό βελτιώνουν την πόλη. ΠάÏκα όπως ο Εθνικός Κήπος Ï€ÏοσφέÏουν χαλάÏωση. Το Ωδείο ΗÏώδου Î‘Ï„Ï„Î¹ÎºÎ¿Ï Ï†Î¹Î»Î¿Î¾ÎµÎ½ÎµÎ¯ συναυλίες. Εν ολίγοις, η Αθήνα συνδυάζει ιστοÏία και ζωντάνια. ## Emojis and Symbols ### Common 😀 🎉 🌟 🚀 💡 🎨 🌈 🕠🎵 🆠### Modifiers Skin tone: 👋 👋🻠👋🼠👋🽠👋🾠👋🿠Gender: 🧑 👨 👩 🧑â€âš•ï¸ ðŸ‘¨â€âš•ï¸ ðŸ‘©â€âš•ï¸ Hair style: 🧑â€ðŸ¦° 👨â€ðŸ¦° 👩â€ðŸ¦° 🧑â€ðŸ¦± 👨â€ðŸ¦± 👩â€ðŸ¦± ### Sequences and Combinations Profession: 👨â€ðŸ’» 👩â€ðŸ’» 👨â€ðŸ”¬ 👩â€ðŸ”¬ 👨â€ðŸ« 👩â€ðŸ« Activity: 🧑â€ðŸ¤â€ðŸ§‘ 👨â€â¤ï¸â€ðŸ‘¨ 👩â€â¤ï¸â€ðŸ‘© 🧑â€â¤ï¸â€ðŸ’‹â€ðŸ§‘ ### Flags 🇺🇸 🇬🇧 🇫🇷 🇩🇪 🇯🇵 🇰🇷 🇨🇳 🇮🇳 🇧🇷 🇷🇺 Extended: ðŸ´ó §ó ¢ó ¥ó ®ó §ó ¿ ðŸ´ó §ó ¢ó ³ó £ó ´ó ¿ ðŸ´ó §ó ¢ó ·ó ¬ó ³ó ¿ ### Special Characters Zero-width: ​ †‌ ‌ ‌ Combining: aÌ€ aÌ aÌ‚ ã ä aÌŠ a̧ ą a̱ a̲ Variation: â™€ï¸ â™‚ï¸ â™Ÿï¸ â™ž ♠♜ â™› ♚ â™™ ♘ ### Mathematical and Technical Symbols ∑ ∠∫ ∬ ∭ ∮ ∯ ∰ ∇ ∆ ∂ √ ∞ ⌘ ⌥ ⇧ ⌃ ⎋ ⌫ ⌦ ⇥ ⇤ ⇧ ⇪ ### Currency and Financial Symbols $ € £ Â¥ ₹ ₽ â‚© ₪ ₨ ₦ ₱ â‚« â‚´ ₸ 💰 💵 💴 💶 💷 💸 💳 🦠📊 📈 📉 ### Food and Beverages 🎠ðŸ 🊠🋠🌠🉠🇠📠🫠🈠🥭 👠☕ 🵠🧃 🥤 🧋 🺠🷠🥂 🾠🥃 🧉 golang-github-clipperhouse-displaywidth-0.11.0+ds/testdata/test_cases.txt000066400000000000000000000041141515060771000266360ustar00rootroot00000000000000# Comprehensive Test Cases for String Width Comparison # This file contains various character types to test both our implementation and go-runewidth # Basic ASCII hello Hello World 1234567890 !@#$%^&*() # Latin Characters café naïve résumé Zürich São Paulo # East Asian Characters Chinese: 中文 Japanese: ã“ã‚“ã«ã¡ã¯ Korean: 안녕하세요 Mixed: Hello 世界 # Fullwidth Characters Fullwidth A: A Fullwidth 1: 1 Fullwidth !: ï¼ # Ambiguous Characters ★ ☆ â™  ♣ ♥ ♦ ° ± × ÷ ↠→ ↑ ↓ « » ‹ › # Emoji 😀 😠😂 🤣 😃 😄 😅 😆 😉 😊 🚀 🎉 🎊 🎈 🎠🎂 🎃 🎄 🎆 🎇 👨â€ðŸ‘©â€ðŸ‘§â€ðŸ‘¦ 👨â€ðŸ’» 👩â€ðŸ”¬ 👨â€ðŸŽ¨ 👩â€ðŸš€ 🇺🇸 🇬🇧 🇫🇷 🇩🇪 🇯🇵 🇰🇷 🇨🇳 # Combining Marks e + combining acute: é a + combining grave: à o + combining tilde: õ n + combining tilde: ñ # Special Symbols Currency: $ € £ Â¥ ₹ ₽ Math: ∑ ∠∫ ∞ ≤ ≥ ≠ ≈ Arrows: ↠→ ↑ ↓ ↔ ↕ ⇠⇒ ⇑ ⇓ Punctuation: … — – " " ' ' # Mixed Complex Cases Hello 世界! 😀 Price: $100.00 €85.50 Math: ∑(x²) = ∞ Emoji sequence: 👨â€ðŸ’» working on 🚀 Mixed script: Hello 世界 안녕하세요 ã“ã‚“ã«ã¡ã¯ # Long strings for performance testing This is a very long string with many characters to test performance of both implementations. It contains various character types including ASCII, Unicode, emoji, and special symbols. The purpose is to see how both packages handle longer strings and whether there are any performance differences or edge cases that emerge with more complex input. # Stress test with many emoji 😀ðŸ˜ðŸ˜‚🤣😃😄😅😆😉😊😋😎ðŸ˜ðŸ˜˜ðŸ¥°ðŸ˜—😙😚☺ï¸ðŸ™‚🤗🤩🤔🤨ðŸ˜ðŸ˜‘😶🙄ðŸ˜ðŸ˜£ðŸ˜¥ðŸ˜®ðŸ¤ðŸ˜¯ðŸ˜ªðŸ˜«ðŸ¥±ðŸ˜´ðŸ˜ŒðŸ˜›ðŸ˜œðŸ˜ðŸ¤¤ðŸ˜’😓😔😕🙃🤑😲☹ï¸ðŸ™ðŸ˜–😞😟😤😢😭😦😧😨😩🤯😬😰😱🥵🥶😳🤪😵😡😠🤬😷🤒🤕🤢🤮🤧😇🤠🤡🥳🥴🥺🤥🤫🤭ðŸ§ðŸ¤“😈👿💀☠ï¸ðŸ‘¹ðŸ‘ºðŸ¤–👽👾💩😺😸😹😻😼😽🙀😿😾 golang-github-clipperhouse-displaywidth-0.11.0+ds/testdata/testdata.go000066400000000000000000000013111515060771000260740ustar00rootroot00000000000000package testdata import ( "crypto/rand" mathrand "math/rand" "os" "path/filepath" "runtime" ) func InvalidUTF8() ([]byte, error) { return load("UTF-8-test.txt") } func Sample() ([]byte, error) { return load("sample.txt") } func TestCases() ([]byte, error) { return load("test_cases.txt") } func RandomBytes() ([]byte, error) { length := mathrand.Intn(50) buf := make([]byte, length) _, err := rand.Read(buf) if err != nil { return nil, err } return buf, nil } func load(filename string) ([]byte, error) { // Get the directory of this source file _, currentFile, _, _ := runtime.Caller(0) dir := filepath.Dir(currentFile) path := filepath.Join(dir, filename) return os.ReadFile(path) } golang-github-clipperhouse-displaywidth-0.11.0+ds/trie.go000066400000000000000000003051361515060771000234310ustar00rootroot00000000000000// Code generated by internal/gen/main.go. DO NOT EDIT. package displaywidth // property is an enum representing the properties of a character type property uint8 const ( // Always 0 width, includes combining marks, control characters, non-printable, etc _Zero_Width property = iota + 1 // Always 2 wide (East Asian Wide F/W, Emoji, Regional Indicator) _Wide // Width depends on EastAsianWidth option _East_Asian_Ambiguous ) // lookup returns the trie value for the first UTF-8 encoding in s and // the width in bytes of this encoding. The size will be 0 if s does not // hold enough bytes to complete the encoding. len(s) must be greater than 0. func lookup[T ~string | []byte](s T) (v uint8, sz int) { c0 := s[0] switch { case c0 < 0x80: // is ASCII return stringWidthValues[c0], 1 case c0 < 0xC2: return 0, 1 // Illegal UTF-8: not a starter, not ASCII. case c0 < 0xE0: // 2-byte UTF-8 if len(s) < 2 { return 0, 0 } i := stringWidthIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } return lookupValue(uint32(i), c1), 2 case c0 < 0xF0: // 3-byte UTF-8 if len(s) < 3 { return 0, 0 } i := stringWidthIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = stringWidthIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } return lookupValue(uint32(i), c2), 3 case c0 < 0xF8: // 4-byte UTF-8 if len(s) < 4 { return 0, 0 } i := stringWidthIndex[c0] c1 := s[1] if c1 < 0x80 || 0xC0 <= c1 { return 0, 1 // Illegal UTF-8: not a continuation byte. } o := uint32(i)<<6 + uint32(c1) i = stringWidthIndex[o] c2 := s[2] if c2 < 0x80 || 0xC0 <= c2 { return 0, 2 // Illegal UTF-8: not a continuation byte. } o = uint32(i)<<6 + uint32(c2) i = stringWidthIndex[o] c3 := s[3] if c3 < 0x80 || 0xC0 <= c3 { return 0, 3 // Illegal UTF-8: not a continuation byte. } return lookupValue(uint32(i), c3), 4 } // Illegal rune return 0, 1 } // stringWidthTrie. Total size: 17664 bytes (17.25 KiB). Checksum: 220983462f26d765. // type stringWidthTrie struct { } // func newStringWidthTrie(i int) *stringWidthTrie { // return &stringWidthTrie{} // } // lookupValue determines the type of block n and looks up the value for b. func lookupValue(n uint32, b byte) uint8 { switch { default: return uint8(stringWidthValues[n<<6+uint32(b)]) } } // stringWidthValues: 246 blocks, 15744 entries, 15744 bytes // The third block is the zero block. var stringWidthValues = [15744]uint8{ // Block 0x0, offset 0x0 // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 0xc0: 0x0001, 0xc1: 0x0001, 0xc2: 0x0001, 0xc3: 0x0001, 0xc4: 0x0001, 0xc5: 0x0001, 0xc6: 0x0001, 0xc7: 0x0001, 0xc8: 0x0001, 0xc9: 0x0001, 0xca: 0x0001, 0xcb: 0x0001, 0xcc: 0x0001, 0xcd: 0x0001, 0xce: 0x0001, 0xcf: 0x0001, 0xd0: 0x0001, 0xd1: 0x0001, 0xd2: 0x0001, 0xd3: 0x0001, 0xd4: 0x0001, 0xd5: 0x0001, 0xd6: 0x0001, 0xd7: 0x0001, 0xd8: 0x0001, 0xd9: 0x0001, 0xda: 0x0001, 0xdb: 0x0001, 0xdc: 0x0001, 0xdd: 0x0001, 0xde: 0x0001, 0xdf: 0x0001, 0xe1: 0x0003, 0xe4: 0x0003, 0xe7: 0x0003, 0xe8: 0x0003, 0xea: 0x0003, 0xed: 0x0001, 0xee: 0x0003, 0xf0: 0x0003, 0xf1: 0x0003, 0xf2: 0x0003, 0xf3: 0x0003, 0xf4: 0x0003, 0xf6: 0x0003, 0xf7: 0x0003, 0xf8: 0x0003, 0xf9: 0x0003, 0xfa: 0x0003, 0xfc: 0x0003, 0xfd: 0x0003, 0xfe: 0x0003, 0xff: 0x0003, // Block 0x4, offset 0x100 0x106: 0x0003, 0x110: 0x0003, 0x117: 0x0003, 0x118: 0x0003, 0x11e: 0x0003, 0x11f: 0x0003, 0x120: 0x0003, 0x121: 0x0003, 0x126: 0x0003, 0x128: 0x0003, 0x129: 0x0003, 0x12a: 0x0003, 0x12c: 0x0003, 0x12d: 0x0003, 0x130: 0x0003, 0x132: 0x0003, 0x133: 0x0003, 0x137: 0x0003, 0x138: 0x0003, 0x139: 0x0003, 0x13a: 0x0003, 0x13c: 0x0003, 0x13e: 0x0003, // Block 0x5, offset 0x140 0x141: 0x0003, 0x151: 0x0003, 0x153: 0x0003, 0x15b: 0x0003, 0x166: 0x0003, 0x167: 0x0003, 0x16b: 0x0003, 0x171: 0x0003, 0x172: 0x0003, 0x173: 0x0003, 0x178: 0x0003, 0x17f: 0x0003, // Block 0x6, offset 0x180 0x180: 0x0003, 0x181: 0x0003, 0x182: 0x0003, 0x184: 0x0003, 0x188: 0x0003, 0x189: 0x0003, 0x18a: 0x0003, 0x18b: 0x0003, 0x18d: 0x0003, 0x192: 0x0003, 0x193: 0x0003, 0x1a6: 0x0003, 0x1a7: 0x0003, 0x1ab: 0x0003, // Block 0x7, offset 0x1c0 0x1ce: 0x0003, 0x1d0: 0x0003, 0x1d2: 0x0003, 0x1d4: 0x0003, 0x1d6: 0x0003, 0x1d8: 0x0003, 0x1da: 0x0003, 0x1dc: 0x0003, // Block 0x8, offset 0x200 0x211: 0x0003, 0x221: 0x0003, // Block 0x9, offset 0x240 0x244: 0x0003, 0x247: 0x0003, 0x249: 0x0003, 0x24a: 0x0003, 0x24b: 0x0003, 0x24d: 0x0003, 0x250: 0x0003, 0x258: 0x0003, 0x259: 0x0003, 0x25a: 0x0003, 0x25b: 0x0003, 0x25d: 0x0003, 0x25f: 0x0003, // Block 0xa, offset 0x280 0x280: 0x0001, 0x281: 0x0001, 0x282: 0x0001, 0x283: 0x0001, 0x284: 0x0001, 0x285: 0x0001, 0x286: 0x0001, 0x287: 0x0001, 0x288: 0x0001, 0x289: 0x0001, 0x28a: 0x0001, 0x28b: 0x0001, 0x28c: 0x0001, 0x28d: 0x0001, 0x28e: 0x0001, 0x28f: 0x0001, 0x290: 0x0001, 0x291: 0x0001, 0x292: 0x0001, 0x293: 0x0001, 0x294: 0x0001, 0x295: 0x0001, 0x296: 0x0001, 0x297: 0x0001, 0x298: 0x0001, 0x299: 0x0001, 0x29a: 0x0001, 0x29b: 0x0001, 0x29c: 0x0001, 0x29d: 0x0001, 0x29e: 0x0001, 0x29f: 0x0001, 0x2a0: 0x0001, 0x2a1: 0x0001, 0x2a2: 0x0001, 0x2a3: 0x0001, 0x2a4: 0x0001, 0x2a5: 0x0001, 0x2a6: 0x0001, 0x2a7: 0x0001, 0x2a8: 0x0001, 0x2a9: 0x0001, 0x2aa: 0x0001, 0x2ab: 0x0001, 0x2ac: 0x0001, 0x2ad: 0x0001, 0x2ae: 0x0001, 0x2af: 0x0001, 0x2b0: 0x0001, 0x2b1: 0x0001, 0x2b2: 0x0001, 0x2b3: 0x0001, 0x2b4: 0x0001, 0x2b5: 0x0001, 0x2b6: 0x0001, 0x2b7: 0x0001, 0x2b8: 0x0001, 0x2b9: 0x0001, 0x2ba: 0x0001, 0x2bb: 0x0001, 0x2bc: 0x0001, 0x2bd: 0x0001, 0x2be: 0x0001, 0x2bf: 0x0001, // Block 0xb, offset 0x2c0 0x2c0: 0x0001, 0x2c1: 0x0001, 0x2c2: 0x0001, 0x2c3: 0x0001, 0x2c4: 0x0001, 0x2c5: 0x0001, 0x2c6: 0x0001, 0x2c7: 0x0001, 0x2c8: 0x0001, 0x2c9: 0x0001, 0x2ca: 0x0001, 0x2cb: 0x0001, 0x2cc: 0x0001, 0x2cd: 0x0001, 0x2ce: 0x0001, 0x2cf: 0x0001, 0x2d0: 0x0001, 0x2d1: 0x0001, 0x2d2: 0x0001, 0x2d3: 0x0001, 0x2d4: 0x0001, 0x2d5: 0x0001, 0x2d6: 0x0001, 0x2d7: 0x0001, 0x2d8: 0x0001, 0x2d9: 0x0001, 0x2da: 0x0001, 0x2db: 0x0001, 0x2dc: 0x0001, 0x2dd: 0x0001, 0x2de: 0x0001, 0x2df: 0x0001, 0x2e0: 0x0001, 0x2e1: 0x0001, 0x2e2: 0x0001, 0x2e3: 0x0001, 0x2e4: 0x0001, 0x2e5: 0x0001, 0x2e6: 0x0001, 0x2e7: 0x0001, 0x2e8: 0x0001, 0x2e9: 0x0001, 0x2ea: 0x0001, 0x2eb: 0x0001, 0x2ec: 0x0001, 0x2ed: 0x0001, 0x2ee: 0x0001, 0x2ef: 0x0001, // Block 0xc, offset 0x300 0x311: 0x0003, 0x312: 0x0003, 0x313: 0x0003, 0x314: 0x0003, 0x315: 0x0003, 0x316: 0x0003, 0x317: 0x0003, 0x318: 0x0003, 0x319: 0x0003, 0x31a: 0x0003, 0x31b: 0x0003, 0x31c: 0x0003, 0x31d: 0x0003, 0x31e: 0x0003, 0x31f: 0x0003, 0x320: 0x0003, 0x321: 0x0003, 0x323: 0x0003, 0x324: 0x0003, 0x325: 0x0003, 0x326: 0x0003, 0x327: 0x0003, 0x328: 0x0003, 0x329: 0x0003, 0x331: 0x0003, 0x332: 0x0003, 0x333: 0x0003, 0x334: 0x0003, 0x335: 0x0003, 0x336: 0x0003, 0x337: 0x0003, 0x338: 0x0003, 0x339: 0x0003, 0x33a: 0x0003, 0x33b: 0x0003, 0x33c: 0x0003, 0x33d: 0x0003, 0x33e: 0x0003, 0x33f: 0x0003, // Block 0xd, offset 0x340 0x340: 0x0003, 0x341: 0x0003, 0x343: 0x0003, 0x344: 0x0003, 0x345: 0x0003, 0x346: 0x0003, 0x347: 0x0003, 0x348: 0x0003, 0x349: 0x0003, // Block 0xe, offset 0x380 0x381: 0x0003, 0x390: 0x0003, 0x391: 0x0003, 0x392: 0x0003, 0x393: 0x0003, 0x394: 0x0003, 0x395: 0x0003, 0x396: 0x0003, 0x397: 0x0003, 0x398: 0x0003, 0x399: 0x0003, 0x39a: 0x0003, 0x39b: 0x0003, 0x39c: 0x0003, 0x39d: 0x0003, 0x39e: 0x0003, 0x39f: 0x0003, 0x3a0: 0x0003, 0x3a1: 0x0003, 0x3a2: 0x0003, 0x3a3: 0x0003, 0x3a4: 0x0003, 0x3a5: 0x0003, 0x3a6: 0x0003, 0x3a7: 0x0003, 0x3a8: 0x0003, 0x3a9: 0x0003, 0x3aa: 0x0003, 0x3ab: 0x0003, 0x3ac: 0x0003, 0x3ad: 0x0003, 0x3ae: 0x0003, 0x3af: 0x0003, 0x3b0: 0x0003, 0x3b1: 0x0003, 0x3b2: 0x0003, 0x3b3: 0x0003, 0x3b4: 0x0003, 0x3b5: 0x0003, 0x3b6: 0x0003, 0x3b7: 0x0003, 0x3b8: 0x0003, 0x3b9: 0x0003, 0x3ba: 0x0003, 0x3bb: 0x0003, 0x3bc: 0x0003, 0x3bd: 0x0003, 0x3be: 0x0003, 0x3bf: 0x0003, // Block 0xf, offset 0x3c0 0x3c0: 0x0003, 0x3c1: 0x0003, 0x3c2: 0x0003, 0x3c3: 0x0003, 0x3c4: 0x0003, 0x3c5: 0x0003, 0x3c6: 0x0003, 0x3c7: 0x0003, 0x3c8: 0x0003, 0x3c9: 0x0003, 0x3ca: 0x0003, 0x3cb: 0x0003, 0x3cc: 0x0003, 0x3cd: 0x0003, 0x3ce: 0x0003, 0x3cf: 0x0003, 0x3d1: 0x0003, // Block 0x10, offset 0x400 0x403: 0x0001, 0x404: 0x0001, 0x405: 0x0001, 0x406: 0x0001, 0x407: 0x0001, 0x408: 0x0001, 0x409: 0x0001, // Block 0x11, offset 0x440 0x451: 0x0001, 0x452: 0x0001, 0x453: 0x0001, 0x454: 0x0001, 0x455: 0x0001, 0x456: 0x0001, 0x457: 0x0001, 0x458: 0x0001, 0x459: 0x0001, 0x45a: 0x0001, 0x45b: 0x0001, 0x45c: 0x0001, 0x45d: 0x0001, 0x45e: 0x0001, 0x45f: 0x0001, 0x460: 0x0001, 0x461: 0x0001, 0x462: 0x0001, 0x463: 0x0001, 0x464: 0x0001, 0x465: 0x0001, 0x466: 0x0001, 0x467: 0x0001, 0x468: 0x0001, 0x469: 0x0001, 0x46a: 0x0001, 0x46b: 0x0001, 0x46c: 0x0001, 0x46d: 0x0001, 0x46e: 0x0001, 0x46f: 0x0001, 0x470: 0x0001, 0x471: 0x0001, 0x472: 0x0001, 0x473: 0x0001, 0x474: 0x0001, 0x475: 0x0001, 0x476: 0x0001, 0x477: 0x0001, 0x478: 0x0001, 0x479: 0x0001, 0x47a: 0x0001, 0x47b: 0x0001, 0x47c: 0x0001, 0x47d: 0x0001, 0x47f: 0x0001, // Block 0x12, offset 0x480 0x481: 0x0001, 0x482: 0x0001, 0x484: 0x0001, 0x485: 0x0001, 0x487: 0x0001, // Block 0x13, offset 0x4c0 0x4c0: 0x0001, 0x4c1: 0x0001, 0x4c2: 0x0001, 0x4c3: 0x0001, 0x4c4: 0x0001, 0x4c5: 0x0001, 0x4d0: 0x0001, 0x4d1: 0x0001, 0x4d2: 0x0001, 0x4d3: 0x0001, 0x4d4: 0x0001, 0x4d5: 0x0001, 0x4d6: 0x0001, 0x4d7: 0x0001, 0x4d8: 0x0001, 0x4d9: 0x0001, 0x4da: 0x0001, 0x4dc: 0x0001, // Block 0x14, offset 0x500 0x50b: 0x0001, 0x50c: 0x0001, 0x50d: 0x0001, 0x50e: 0x0001, 0x50f: 0x0001, 0x510: 0x0001, 0x511: 0x0001, 0x512: 0x0001, 0x513: 0x0001, 0x514: 0x0001, 0x515: 0x0001, 0x516: 0x0001, 0x517: 0x0001, 0x518: 0x0001, 0x519: 0x0001, 0x51a: 0x0001, 0x51b: 0x0001, 0x51c: 0x0001, 0x51d: 0x0001, 0x51e: 0x0001, 0x51f: 0x0001, 0x530: 0x0001, // Block 0x15, offset 0x540 0x556: 0x0001, 0x557: 0x0001, 0x558: 0x0001, 0x559: 0x0001, 0x55a: 0x0001, 0x55b: 0x0001, 0x55c: 0x0001, 0x55d: 0x0001, 0x55f: 0x0001, 0x560: 0x0001, 0x561: 0x0001, 0x562: 0x0001, 0x563: 0x0001, 0x564: 0x0001, 0x567: 0x0001, 0x568: 0x0001, 0x56a: 0x0001, 0x56b: 0x0001, 0x56c: 0x0001, 0x56d: 0x0001, // Block 0x16, offset 0x580 0x58f: 0x0001, 0x591: 0x0001, 0x5b0: 0x0001, 0x5b1: 0x0001, 0x5b2: 0x0001, 0x5b3: 0x0001, 0x5b4: 0x0001, 0x5b5: 0x0001, 0x5b6: 0x0001, 0x5b7: 0x0001, 0x5b8: 0x0001, 0x5b9: 0x0001, 0x5ba: 0x0001, 0x5bb: 0x0001, 0x5bc: 0x0001, 0x5bd: 0x0001, 0x5be: 0x0001, 0x5bf: 0x0001, // Block 0x17, offset 0x5c0 0x5c0: 0x0001, 0x5c1: 0x0001, 0x5c2: 0x0001, 0x5c3: 0x0001, 0x5c4: 0x0001, 0x5c5: 0x0001, 0x5c6: 0x0001, 0x5c7: 0x0001, 0x5c8: 0x0001, 0x5c9: 0x0001, 0x5ca: 0x0001, // Block 0x18, offset 0x600 0x626: 0x0001, 0x627: 0x0001, 0x628: 0x0001, 0x629: 0x0001, 0x62a: 0x0001, 0x62b: 0x0001, 0x62c: 0x0001, 0x62d: 0x0001, 0x62e: 0x0001, 0x62f: 0x0001, 0x630: 0x0001, // Block 0x19, offset 0x640 0x66b: 0x0001, 0x66c: 0x0001, 0x66d: 0x0001, 0x66e: 0x0001, 0x66f: 0x0001, 0x670: 0x0001, 0x671: 0x0001, 0x672: 0x0001, 0x673: 0x0001, 0x67d: 0x0001, // Block 0x1a, offset 0x680 0x696: 0x0001, 0x697: 0x0001, 0x698: 0x0001, 0x699: 0x0001, 0x69b: 0x0001, 0x69c: 0x0001, 0x69d: 0x0001, 0x69e: 0x0001, 0x69f: 0x0001, 0x6a0: 0x0001, 0x6a1: 0x0001, 0x6a2: 0x0001, 0x6a3: 0x0001, 0x6a5: 0x0001, 0x6a6: 0x0001, 0x6a7: 0x0001, 0x6a9: 0x0001, 0x6aa: 0x0001, 0x6ab: 0x0001, 0x6ac: 0x0001, 0x6ad: 0x0001, // Block 0x1b, offset 0x6c0 0x6d9: 0x0001, 0x6da: 0x0001, 0x6db: 0x0001, // Block 0x1c, offset 0x700 0x710: 0x0001, 0x711: 0x0001, 0x718: 0x0001, 0x719: 0x0001, 0x71a: 0x0001, 0x71b: 0x0001, 0x71c: 0x0001, 0x71d: 0x0001, 0x71e: 0x0001, 0x71f: 0x0001, // Block 0x1d, offset 0x740 0x74a: 0x0001, 0x74b: 0x0001, 0x74c: 0x0001, 0x74d: 0x0001, 0x74e: 0x0001, 0x74f: 0x0001, 0x750: 0x0001, 0x751: 0x0001, 0x752: 0x0001, 0x753: 0x0001, 0x754: 0x0001, 0x755: 0x0001, 0x756: 0x0001, 0x757: 0x0001, 0x758: 0x0001, 0x759: 0x0001, 0x75a: 0x0001, 0x75b: 0x0001, 0x75c: 0x0001, 0x75d: 0x0001, 0x75e: 0x0001, 0x75f: 0x0001, 0x760: 0x0001, 0x761: 0x0001, 0x762: 0x0001, 0x763: 0x0001, 0x764: 0x0001, 0x765: 0x0001, 0x766: 0x0001, 0x767: 0x0001, 0x768: 0x0001, 0x769: 0x0001, 0x76a: 0x0001, 0x76b: 0x0001, 0x76c: 0x0001, 0x76d: 0x0001, 0x76e: 0x0001, 0x76f: 0x0001, 0x770: 0x0001, 0x771: 0x0001, 0x772: 0x0001, 0x773: 0x0001, 0x774: 0x0001, 0x775: 0x0001, 0x776: 0x0001, 0x777: 0x0001, 0x778: 0x0001, 0x779: 0x0001, 0x77a: 0x0001, 0x77b: 0x0001, 0x77c: 0x0001, 0x77d: 0x0001, 0x77e: 0x0001, 0x77f: 0x0001, // Block 0x1e, offset 0x780 0x780: 0x0001, 0x781: 0x0001, 0x782: 0x0001, 0x7ba: 0x0001, 0x7bc: 0x0001, // Block 0x1f, offset 0x7c0 0x7c1: 0x0001, 0x7c2: 0x0001, 0x7c3: 0x0001, 0x7c4: 0x0001, 0x7c5: 0x0001, 0x7c6: 0x0001, 0x7c7: 0x0001, 0x7c8: 0x0001, 0x7cd: 0x0001, 0x7d1: 0x0001, 0x7d2: 0x0001, 0x7d3: 0x0001, 0x7d4: 0x0001, 0x7d5: 0x0001, 0x7d6: 0x0001, 0x7d7: 0x0001, 0x7e2: 0x0001, 0x7e3: 0x0001, // Block 0x20, offset 0x800 0x801: 0x0001, 0x83c: 0x0001, // Block 0x21, offset 0x840 0x841: 0x0001, 0x842: 0x0001, 0x843: 0x0001, 0x844: 0x0001, 0x84d: 0x0001, 0x862: 0x0001, 0x863: 0x0001, 0x87e: 0x0001, // Block 0x22, offset 0x880 0x881: 0x0001, 0x882: 0x0001, 0x8bc: 0x0001, // Block 0x23, offset 0x8c0 0x8c1: 0x0001, 0x8c2: 0x0001, 0x8c7: 0x0001, 0x8c8: 0x0001, 0x8cb: 0x0001, 0x8cc: 0x0001, 0x8cd: 0x0001, 0x8d1: 0x0001, 0x8f0: 0x0001, 0x8f1: 0x0001, 0x8f5: 0x0001, // Block 0x24, offset 0x900 0x901: 0x0001, 0x902: 0x0001, 0x903: 0x0001, 0x904: 0x0001, 0x905: 0x0001, 0x907: 0x0001, 0x908: 0x0001, 0x90d: 0x0001, 0x922: 0x0001, 0x923: 0x0001, 0x93a: 0x0001, 0x93b: 0x0001, 0x93c: 0x0001, 0x93d: 0x0001, 0x93e: 0x0001, 0x93f: 0x0001, // Block 0x25, offset 0x940 0x941: 0x0001, 0x97c: 0x0001, 0x97f: 0x0001, // Block 0x26, offset 0x980 0x981: 0x0001, 0x982: 0x0001, 0x983: 0x0001, 0x984: 0x0001, 0x98d: 0x0001, 0x995: 0x0001, 0x996: 0x0001, 0x9a2: 0x0001, 0x9a3: 0x0001, // Block 0x27, offset 0x9c0 0x9c2: 0x0001, // Block 0x28, offset 0xa00 0xa00: 0x0001, 0xa0d: 0x0001, // Block 0x29, offset 0xa40 0xa40: 0x0001, 0xa44: 0x0001, 0xa7c: 0x0001, 0xa7e: 0x0001, 0xa7f: 0x0001, // Block 0x2a, offset 0xa80 0xa80: 0x0001, 0xa86: 0x0001, 0xa87: 0x0001, 0xa88: 0x0001, 0xa8a: 0x0001, 0xa8b: 0x0001, 0xa8c: 0x0001, 0xa8d: 0x0001, 0xa95: 0x0001, 0xa96: 0x0001, 0xaa2: 0x0001, 0xaa3: 0x0001, // Block 0x2b, offset 0xac0 0xac6: 0x0001, 0xacc: 0x0001, 0xacd: 0x0001, 0xae2: 0x0001, 0xae3: 0x0001, // Block 0x2c, offset 0xb00 0xb00: 0x0001, 0xb01: 0x0001, 0xb3b: 0x0001, 0xb3c: 0x0001, // Block 0x2d, offset 0xb40 0xb41: 0x0001, 0xb42: 0x0001, 0xb43: 0x0001, 0xb44: 0x0001, 0xb4d: 0x0001, 0xb62: 0x0001, 0xb63: 0x0001, // Block 0x2e, offset 0xb80 0xb81: 0x0001, // Block 0x2f, offset 0xbc0 0xbca: 0x0001, 0xbd2: 0x0001, 0xbd3: 0x0001, 0xbd4: 0x0001, 0xbd6: 0x0001, // Block 0x30, offset 0xc00 0xc31: 0x0001, 0xc34: 0x0001, 0xc35: 0x0001, 0xc36: 0x0001, 0xc37: 0x0001, 0xc38: 0x0001, 0xc39: 0x0001, 0xc3a: 0x0001, // Block 0x31, offset 0xc40 0xc47: 0x0001, 0xc48: 0x0001, 0xc49: 0x0001, 0xc4a: 0x0001, 0xc4b: 0x0001, 0xc4c: 0x0001, 0xc4d: 0x0001, 0xc4e: 0x0001, // Block 0x32, offset 0xc80 0xcb1: 0x0001, 0xcb4: 0x0001, 0xcb5: 0x0001, 0xcb6: 0x0001, 0xcb7: 0x0001, 0xcb8: 0x0001, 0xcb9: 0x0001, 0xcba: 0x0001, 0xcbb: 0x0001, 0xcbc: 0x0001, // Block 0x33, offset 0xcc0 0xcc8: 0x0001, 0xcc9: 0x0001, 0xcca: 0x0001, 0xccb: 0x0001, 0xccc: 0x0001, 0xccd: 0x0001, 0xcce: 0x0001, // Block 0x34, offset 0xd00 0xd18: 0x0001, 0xd19: 0x0001, 0xd35: 0x0001, 0xd37: 0x0001, 0xd39: 0x0001, // Block 0x35, offset 0xd40 0xd71: 0x0001, 0xd72: 0x0001, 0xd73: 0x0001, 0xd74: 0x0001, 0xd75: 0x0001, 0xd76: 0x0001, 0xd77: 0x0001, 0xd78: 0x0001, 0xd79: 0x0001, 0xd7a: 0x0001, 0xd7b: 0x0001, 0xd7c: 0x0001, 0xd7d: 0x0001, 0xd7e: 0x0001, // Block 0x36, offset 0xd80 0xd80: 0x0001, 0xd81: 0x0001, 0xd82: 0x0001, 0xd83: 0x0001, 0xd84: 0x0001, 0xd86: 0x0001, 0xd87: 0x0001, 0xd8d: 0x0001, 0xd8e: 0x0001, 0xd8f: 0x0001, 0xd90: 0x0001, 0xd91: 0x0001, 0xd92: 0x0001, 0xd93: 0x0001, 0xd94: 0x0001, 0xd95: 0x0001, 0xd96: 0x0001, 0xd97: 0x0001, 0xd99: 0x0001, 0xd9a: 0x0001, 0xd9b: 0x0001, 0xd9c: 0x0001, 0xd9d: 0x0001, 0xd9e: 0x0001, 0xd9f: 0x0001, 0xda0: 0x0001, 0xda1: 0x0001, 0xda2: 0x0001, 0xda3: 0x0001, 0xda4: 0x0001, 0xda5: 0x0001, 0xda6: 0x0001, 0xda7: 0x0001, 0xda8: 0x0001, 0xda9: 0x0001, 0xdaa: 0x0001, 0xdab: 0x0001, 0xdac: 0x0001, 0xdad: 0x0001, 0xdae: 0x0001, 0xdaf: 0x0001, 0xdb0: 0x0001, 0xdb1: 0x0001, 0xdb2: 0x0001, 0xdb3: 0x0001, 0xdb4: 0x0001, 0xdb5: 0x0001, 0xdb6: 0x0001, 0xdb7: 0x0001, 0xdb8: 0x0001, 0xdb9: 0x0001, 0xdba: 0x0001, 0xdbb: 0x0001, 0xdbc: 0x0001, // Block 0x37, offset 0xdc0 0xdc6: 0x0001, // Block 0x38, offset 0xe00 0xe2d: 0x0001, 0xe2e: 0x0001, 0xe2f: 0x0001, 0xe30: 0x0001, 0xe32: 0x0001, 0xe33: 0x0001, 0xe34: 0x0001, 0xe35: 0x0001, 0xe36: 0x0001, 0xe37: 0x0001, 0xe39: 0x0001, 0xe3a: 0x0001, 0xe3d: 0x0001, 0xe3e: 0x0001, // Block 0x39, offset 0xe40 0xe58: 0x0001, 0xe59: 0x0001, 0xe5e: 0x0001, 0xe5f: 0x0001, 0xe60: 0x0001, 0xe71: 0x0001, 0xe72: 0x0001, 0xe73: 0x0001, 0xe74: 0x0001, // Block 0x3a, offset 0xe80 0xe82: 0x0001, 0xe85: 0x0001, 0xe86: 0x0001, 0xe8d: 0x0001, 0xe9d: 0x0001, // Block 0x3b, offset 0xec0 0xec0: 0x0002, 0xec1: 0x0002, 0xec2: 0x0002, 0xec3: 0x0002, 0xec4: 0x0002, 0xec5: 0x0002, 0xec6: 0x0002, 0xec7: 0x0002, 0xec8: 0x0002, 0xec9: 0x0002, 0xeca: 0x0002, 0xecb: 0x0002, 0xecc: 0x0002, 0xecd: 0x0002, 0xece: 0x0002, 0xecf: 0x0002, 0xed0: 0x0002, 0xed1: 0x0002, 0xed2: 0x0002, 0xed3: 0x0002, 0xed4: 0x0002, 0xed5: 0x0002, 0xed6: 0x0002, 0xed7: 0x0002, 0xed8: 0x0002, 0xed9: 0x0002, 0xeda: 0x0002, 0xedb: 0x0002, 0xedc: 0x0002, 0xedd: 0x0002, 0xede: 0x0002, 0xedf: 0x0002, 0xee0: 0x0002, 0xee1: 0x0002, 0xee2: 0x0002, 0xee3: 0x0002, 0xee4: 0x0002, 0xee5: 0x0002, 0xee6: 0x0002, 0xee7: 0x0002, 0xee8: 0x0002, 0xee9: 0x0002, 0xeea: 0x0002, 0xeeb: 0x0002, 0xeec: 0x0002, 0xeed: 0x0002, 0xeee: 0x0002, 0xeef: 0x0002, 0xef0: 0x0002, 0xef1: 0x0002, 0xef2: 0x0002, 0xef3: 0x0002, 0xef4: 0x0002, 0xef5: 0x0002, 0xef6: 0x0002, 0xef7: 0x0002, 0xef8: 0x0002, 0xef9: 0x0002, 0xefa: 0x0002, 0xefb: 0x0002, 0xefc: 0x0002, 0xefd: 0x0002, 0xefe: 0x0002, 0xeff: 0x0002, // Block 0x3c, offset 0xf00 0xf00: 0x0002, 0xf01: 0x0002, 0xf02: 0x0002, 0xf03: 0x0002, 0xf04: 0x0002, 0xf05: 0x0002, 0xf06: 0x0002, 0xf07: 0x0002, 0xf08: 0x0002, 0xf09: 0x0002, 0xf0a: 0x0002, 0xf0b: 0x0002, 0xf0c: 0x0002, 0xf0d: 0x0002, 0xf0e: 0x0002, 0xf0f: 0x0002, 0xf10: 0x0002, 0xf11: 0x0002, 0xf12: 0x0002, 0xf13: 0x0002, 0xf14: 0x0002, 0xf15: 0x0002, 0xf16: 0x0002, 0xf17: 0x0002, 0xf18: 0x0002, 0xf19: 0x0002, 0xf1a: 0x0002, 0xf1b: 0x0002, 0xf1c: 0x0002, 0xf1d: 0x0002, 0xf1e: 0x0002, 0xf1f: 0x0002, // Block 0x3d, offset 0xf40 0xf5d: 0x0001, 0xf5e: 0x0001, 0xf5f: 0x0001, // Block 0x3e, offset 0xf80 0xf92: 0x0001, 0xf93: 0x0001, 0xf94: 0x0001, 0xfb2: 0x0001, 0xfb3: 0x0001, // Block 0x3f, offset 0xfc0 0xfd2: 0x0001, 0xfd3: 0x0001, 0xff2: 0x0001, 0xff3: 0x0001, // Block 0x40, offset 0x1000 0x1034: 0x0001, 0x1035: 0x0001, 0x1037: 0x0001, 0x1038: 0x0001, 0x1039: 0x0001, 0x103a: 0x0001, 0x103b: 0x0001, 0x103c: 0x0001, 0x103d: 0x0001, // Block 0x41, offset 0x1040 0x1046: 0x0001, 0x1049: 0x0001, 0x104a: 0x0001, 0x104b: 0x0001, 0x104c: 0x0001, 0x104d: 0x0001, 0x104e: 0x0001, 0x104f: 0x0001, 0x1050: 0x0001, 0x1051: 0x0001, 0x1052: 0x0001, 0x1053: 0x0001, 0x105d: 0x0001, // Block 0x42, offset 0x1080 0x108b: 0x0001, 0x108c: 0x0001, 0x108d: 0x0001, 0x108e: 0x0001, 0x108f: 0x0001, // Block 0x43, offset 0x10c0 0x10c5: 0x0001, 0x10c6: 0x0001, 0x10e9: 0x0001, // Block 0x44, offset 0x1100 0x1120: 0x0001, 0x1121: 0x0001, 0x1122: 0x0001, 0x1127: 0x0001, 0x1128: 0x0001, 0x1132: 0x0001, 0x1139: 0x0001, 0x113a: 0x0001, 0x113b: 0x0001, // Block 0x45, offset 0x1140 0x1157: 0x0001, 0x1158: 0x0001, 0x115b: 0x0001, // Block 0x46, offset 0x1180 0x1196: 0x0001, 0x1198: 0x0001, 0x1199: 0x0001, 0x119a: 0x0001, 0x119b: 0x0001, 0x119c: 0x0001, 0x119d: 0x0001, 0x119e: 0x0001, 0x11a0: 0x0001, 0x11a2: 0x0001, 0x11a5: 0x0001, 0x11a6: 0x0001, 0x11a7: 0x0001, 0x11a8: 0x0001, 0x11a9: 0x0001, 0x11aa: 0x0001, 0x11ab: 0x0001, 0x11ac: 0x0001, 0x11b3: 0x0001, 0x11b4: 0x0001, 0x11b5: 0x0001, 0x11b6: 0x0001, 0x11b7: 0x0001, 0x11b8: 0x0001, 0x11b9: 0x0001, 0x11ba: 0x0001, 0x11bb: 0x0001, 0x11bc: 0x0001, 0x11bf: 0x0001, // Block 0x47, offset 0x11c0 0x11f0: 0x0001, 0x11f1: 0x0001, 0x11f2: 0x0001, 0x11f3: 0x0001, 0x11f4: 0x0001, 0x11f5: 0x0001, 0x11f6: 0x0001, 0x11f7: 0x0001, 0x11f8: 0x0001, 0x11f9: 0x0001, 0x11fa: 0x0001, 0x11fb: 0x0001, 0x11fc: 0x0001, 0x11fd: 0x0001, 0x11fe: 0x0001, 0x11ff: 0x0001, // Block 0x48, offset 0x1200 0x1200: 0x0001, 0x1201: 0x0001, 0x1202: 0x0001, 0x1203: 0x0001, 0x1204: 0x0001, 0x1205: 0x0001, 0x1206: 0x0001, 0x1207: 0x0001, 0x1208: 0x0001, 0x1209: 0x0001, 0x120a: 0x0001, 0x120b: 0x0001, 0x120c: 0x0001, 0x120d: 0x0001, 0x120e: 0x0001, // Block 0x49, offset 0x1240 0x1240: 0x0001, 0x1241: 0x0001, 0x1242: 0x0001, 0x1243: 0x0001, 0x1274: 0x0001, 0x1276: 0x0001, 0x1277: 0x0001, 0x1278: 0x0001, 0x1279: 0x0001, 0x127a: 0x0001, 0x127c: 0x0001, // Block 0x4a, offset 0x1280 0x1282: 0x0001, 0x12ab: 0x0001, 0x12ac: 0x0001, 0x12ad: 0x0001, 0x12ae: 0x0001, 0x12af: 0x0001, 0x12b0: 0x0001, 0x12b1: 0x0001, 0x12b2: 0x0001, 0x12b3: 0x0001, // Block 0x4b, offset 0x12c0 0x12c0: 0x0001, 0x12c1: 0x0001, 0x12e2: 0x0001, 0x12e3: 0x0001, 0x12e4: 0x0001, 0x12e5: 0x0001, 0x12e8: 0x0001, 0x12e9: 0x0001, 0x12eb: 0x0001, 0x12ec: 0x0001, 0x12ed: 0x0001, // Block 0x4c, offset 0x1300 0x1326: 0x0001, 0x1328: 0x0001, 0x1329: 0x0001, 0x132d: 0x0001, 0x132f: 0x0001, 0x1330: 0x0001, 0x1331: 0x0001, // Block 0x4d, offset 0x1340 0x136c: 0x0001, 0x136d: 0x0001, 0x136e: 0x0001, 0x136f: 0x0001, 0x1370: 0x0001, 0x1371: 0x0001, 0x1372: 0x0001, 0x1373: 0x0001, 0x1376: 0x0001, 0x1377: 0x0001, // Block 0x4e, offset 0x1380 0x1390: 0x0001, 0x1391: 0x0001, 0x1392: 0x0001, 0x1394: 0x0001, 0x1395: 0x0001, 0x1396: 0x0001, 0x1397: 0x0001, 0x1398: 0x0001, 0x1399: 0x0001, 0x139a: 0x0001, 0x139b: 0x0001, 0x139c: 0x0001, 0x139d: 0x0001, 0x139e: 0x0001, 0x139f: 0x0001, 0x13a0: 0x0001, 0x13a2: 0x0001, 0x13a3: 0x0001, 0x13a4: 0x0001, 0x13a5: 0x0001, 0x13a6: 0x0001, 0x13a7: 0x0001, 0x13a8: 0x0001, 0x13ad: 0x0001, 0x13b4: 0x0001, 0x13b8: 0x0001, 0x13b9: 0x0001, // Block 0x4f, offset 0x13c0 0x13cb: 0x0001, 0x13cc: 0x0001, 0x13cd: 0x0001, 0x13ce: 0x0001, 0x13cf: 0x0001, 0x13d0: 0x0003, 0x13d3: 0x0003, 0x13d4: 0x0003, 0x13d5: 0x0003, 0x13d6: 0x0003, 0x13d8: 0x0003, 0x13d9: 0x0003, 0x13dc: 0x0003, 0x13dd: 0x0003, 0x13e0: 0x0003, 0x13e1: 0x0003, 0x13e2: 0x0003, 0x13e4: 0x0003, 0x13e5: 0x0003, 0x13e6: 0x0003, 0x13e7: 0x0003, 0x13e8: 0x0001, 0x13e9: 0x0001, 0x13ea: 0x0001, 0x13eb: 0x0001, 0x13ec: 0x0001, 0x13ed: 0x0001, 0x13ee: 0x0001, 0x13f0: 0x0003, 0x13f2: 0x0003, 0x13f3: 0x0003, 0x13f5: 0x0003, 0x13fb: 0x0003, 0x13fe: 0x0003, // Block 0x50, offset 0x1400 0x1420: 0x0001, 0x1421: 0x0001, 0x1422: 0x0001, 0x1423: 0x0001, 0x1424: 0x0001, 0x1426: 0x0001, 0x1427: 0x0001, 0x1428: 0x0001, 0x1429: 0x0001, 0x142a: 0x0001, 0x142b: 0x0001, 0x142c: 0x0001, 0x142d: 0x0001, 0x142e: 0x0001, 0x142f: 0x0001, 0x1434: 0x0003, 0x143f: 0x0003, // Block 0x51, offset 0x1440 0x1441: 0x0003, 0x1442: 0x0003, 0x1443: 0x0003, 0x1444: 0x0003, 0x146c: 0x0003, // Block 0x52, offset 0x1480 0x1490: 0x0001, 0x1491: 0x0001, 0x1492: 0x0001, 0x1493: 0x0001, 0x1494: 0x0001, 0x1495: 0x0001, 0x1496: 0x0001, 0x1497: 0x0001, 0x1498: 0x0001, 0x1499: 0x0001, 0x149a: 0x0001, 0x149b: 0x0001, 0x149c: 0x0001, 0x149d: 0x0001, 0x149e: 0x0001, 0x149f: 0x0001, 0x14a0: 0x0001, 0x14a1: 0x0001, 0x14a2: 0x0001, 0x14a3: 0x0001, 0x14a4: 0x0001, 0x14a5: 0x0001, 0x14a6: 0x0001, 0x14a7: 0x0001, 0x14a8: 0x0001, 0x14a9: 0x0001, 0x14aa: 0x0001, 0x14ab: 0x0001, 0x14ac: 0x0001, 0x14ad: 0x0001, 0x14ae: 0x0001, 0x14af: 0x0001, 0x14b0: 0x0001, // Block 0x53, offset 0x14c0 0x14c3: 0x0003, 0x14c5: 0x0003, 0x14c9: 0x0003, 0x14d3: 0x0003, 0x14d6: 0x0003, 0x14e1: 0x0003, 0x14e2: 0x0003, 0x14e6: 0x0003, 0x14eb: 0x0003, // Block 0x54, offset 0x1500 0x1513: 0x0003, 0x1514: 0x0003, 0x151b: 0x0003, 0x151c: 0x0003, 0x151d: 0x0003, 0x151e: 0x0003, 0x1520: 0x0003, 0x1521: 0x0003, 0x1522: 0x0003, 0x1523: 0x0003, 0x1524: 0x0003, 0x1525: 0x0003, 0x1526: 0x0003, 0x1527: 0x0003, 0x1528: 0x0003, 0x1529: 0x0003, 0x152a: 0x0003, 0x152b: 0x0003, 0x1530: 0x0003, 0x1531: 0x0003, 0x1532: 0x0003, 0x1533: 0x0003, 0x1534: 0x0003, 0x1535: 0x0003, 0x1536: 0x0003, 0x1537: 0x0003, 0x1538: 0x0003, 0x1539: 0x0003, // Block 0x55, offset 0x1540 0x1549: 0x0003, 0x1550: 0x0003, 0x1551: 0x0003, 0x1552: 0x0003, 0x1553: 0x0003, 0x1554: 0x0003, 0x1555: 0x0003, 0x1556: 0x0003, 0x1557: 0x0003, 0x1558: 0x0003, 0x1559: 0x0003, 0x1578: 0x0003, 0x1579: 0x0003, // Block 0x56, offset 0x1580 0x1592: 0x0003, 0x1594: 0x0003, 0x15a7: 0x0003, // Block 0x57, offset 0x15c0 0x15c0: 0x0003, 0x15c2: 0x0003, 0x15c3: 0x0003, 0x15c7: 0x0003, 0x15c8: 0x0003, 0x15cb: 0x0003, 0x15cf: 0x0003, 0x15d1: 0x0003, 0x15d5: 0x0003, 0x15da: 0x0003, 0x15dd: 0x0003, 0x15de: 0x0003, 0x15df: 0x0003, 0x15e0: 0x0003, 0x15e3: 0x0003, 0x15e5: 0x0003, 0x15e7: 0x0003, 0x15e8: 0x0003, 0x15e9: 0x0003, 0x15ea: 0x0003, 0x15eb: 0x0003, 0x15ec: 0x0003, 0x15ee: 0x0003, 0x15f4: 0x0003, 0x15f5: 0x0003, 0x15f6: 0x0003, 0x15f7: 0x0003, 0x15fc: 0x0003, 0x15fd: 0x0003, // Block 0x58, offset 0x1600 0x1608: 0x0003, 0x160c: 0x0003, 0x1612: 0x0003, 0x1620: 0x0003, 0x1621: 0x0003, 0x1624: 0x0003, 0x1625: 0x0003, 0x1626: 0x0003, 0x1627: 0x0003, 0x162a: 0x0003, 0x162b: 0x0003, 0x162e: 0x0003, 0x162f: 0x0003, // Block 0x59, offset 0x1640 0x1642: 0x0003, 0x1643: 0x0003, 0x1646: 0x0003, 0x1647: 0x0003, 0x1655: 0x0003, 0x1659: 0x0003, 0x1665: 0x0003, 0x167f: 0x0003, // Block 0x5a, offset 0x1680 0x1692: 0x0003, 0x169a: 0x0002, 0x169b: 0x0002, 0x16a9: 0x0002, 0x16aa: 0x0002, // Block 0x5b, offset 0x16c0 0x16e9: 0x0002, 0x16ea: 0x0002, 0x16eb: 0x0002, 0x16ec: 0x0002, 0x16f0: 0x0002, 0x16f3: 0x0002, // Block 0x5c, offset 0x1700 0x1720: 0x0003, 0x1721: 0x0003, 0x1722: 0x0003, 0x1723: 0x0003, 0x1724: 0x0003, 0x1725: 0x0003, 0x1726: 0x0003, 0x1727: 0x0003, 0x1728: 0x0003, 0x1729: 0x0003, 0x172a: 0x0003, 0x172b: 0x0003, 0x172c: 0x0003, 0x172d: 0x0003, 0x172e: 0x0003, 0x172f: 0x0003, 0x1730: 0x0003, 0x1731: 0x0003, 0x1732: 0x0003, 0x1733: 0x0003, 0x1734: 0x0003, 0x1735: 0x0003, 0x1736: 0x0003, 0x1737: 0x0003, 0x1738: 0x0003, 0x1739: 0x0003, 0x173a: 0x0003, 0x173b: 0x0003, 0x173c: 0x0003, 0x173d: 0x0003, 0x173e: 0x0003, 0x173f: 0x0003, // Block 0x5d, offset 0x1740 0x1740: 0x0003, 0x1741: 0x0003, 0x1742: 0x0003, 0x1743: 0x0003, 0x1744: 0x0003, 0x1745: 0x0003, 0x1746: 0x0003, 0x1747: 0x0003, 0x1748: 0x0003, 0x1749: 0x0003, 0x174a: 0x0003, 0x174b: 0x0003, 0x174c: 0x0003, 0x174d: 0x0003, 0x174e: 0x0003, 0x174f: 0x0003, 0x1750: 0x0003, 0x1751: 0x0003, 0x1752: 0x0003, 0x1753: 0x0003, 0x1754: 0x0003, 0x1755: 0x0003, 0x1756: 0x0003, 0x1757: 0x0003, 0x1758: 0x0003, 0x1759: 0x0003, 0x175a: 0x0003, 0x175b: 0x0003, 0x175c: 0x0003, 0x175d: 0x0003, 0x175e: 0x0003, 0x175f: 0x0003, 0x1760: 0x0003, 0x1761: 0x0003, 0x1762: 0x0003, 0x1763: 0x0003, 0x1764: 0x0003, 0x1765: 0x0003, 0x1766: 0x0003, 0x1767: 0x0003, 0x1768: 0x0003, 0x1769: 0x0003, 0x176a: 0x0003, 0x176b: 0x0003, 0x176c: 0x0003, 0x176d: 0x0003, 0x176e: 0x0003, 0x176f: 0x0003, 0x1770: 0x0003, 0x1771: 0x0003, 0x1772: 0x0003, 0x1773: 0x0003, 0x1774: 0x0003, 0x1775: 0x0003, 0x1776: 0x0003, 0x1777: 0x0003, 0x1778: 0x0003, 0x1779: 0x0003, 0x177a: 0x0003, 0x177b: 0x0003, 0x177c: 0x0003, 0x177d: 0x0003, 0x177e: 0x0003, 0x177f: 0x0003, // Block 0x5e, offset 0x1780 0x1780: 0x0003, 0x1781: 0x0003, 0x1782: 0x0003, 0x1783: 0x0003, 0x1784: 0x0003, 0x1785: 0x0003, 0x1786: 0x0003, 0x1787: 0x0003, 0x1788: 0x0003, 0x1789: 0x0003, 0x178a: 0x0003, 0x178b: 0x0003, 0x178c: 0x0003, 0x178d: 0x0003, 0x178e: 0x0003, 0x178f: 0x0003, 0x1790: 0x0003, 0x1791: 0x0003, 0x1792: 0x0003, 0x1793: 0x0003, 0x1794: 0x0003, 0x1795: 0x0003, 0x1796: 0x0003, 0x1797: 0x0003, 0x1798: 0x0003, 0x1799: 0x0003, 0x179a: 0x0003, 0x179b: 0x0003, 0x179c: 0x0003, 0x179d: 0x0003, 0x179e: 0x0003, 0x179f: 0x0003, 0x17a0: 0x0003, 0x17a1: 0x0003, 0x17a2: 0x0003, 0x17a3: 0x0003, 0x17a4: 0x0003, 0x17a5: 0x0003, 0x17a6: 0x0003, 0x17a7: 0x0003, 0x17a8: 0x0003, 0x17a9: 0x0003, 0x17ab: 0x0003, 0x17ac: 0x0003, 0x17ad: 0x0003, 0x17ae: 0x0003, 0x17af: 0x0003, 0x17b0: 0x0003, 0x17b1: 0x0003, 0x17b2: 0x0003, 0x17b3: 0x0003, 0x17b4: 0x0003, 0x17b5: 0x0003, 0x17b6: 0x0003, 0x17b7: 0x0003, 0x17b8: 0x0003, 0x17b9: 0x0003, 0x17ba: 0x0003, 0x17bb: 0x0003, 0x17bc: 0x0003, 0x17bd: 0x0003, 0x17be: 0x0003, 0x17bf: 0x0003, // Block 0x5f, offset 0x17c0 0x17c0: 0x0003, 0x17c1: 0x0003, 0x17c2: 0x0003, 0x17c3: 0x0003, 0x17c4: 0x0003, 0x17c5: 0x0003, 0x17c6: 0x0003, 0x17c7: 0x0003, 0x17c8: 0x0003, 0x17c9: 0x0003, 0x17ca: 0x0003, 0x17cb: 0x0003, 0x17d0: 0x0003, 0x17d1: 0x0003, 0x17d2: 0x0003, 0x17d3: 0x0003, 0x17d4: 0x0003, 0x17d5: 0x0003, 0x17d6: 0x0003, 0x17d7: 0x0003, 0x17d8: 0x0003, 0x17d9: 0x0003, 0x17da: 0x0003, 0x17db: 0x0003, 0x17dc: 0x0003, 0x17dd: 0x0003, 0x17de: 0x0003, 0x17df: 0x0003, 0x17e0: 0x0003, 0x17e1: 0x0003, 0x17e2: 0x0003, 0x17e3: 0x0003, 0x17e4: 0x0003, 0x17e5: 0x0003, 0x17e6: 0x0003, 0x17e7: 0x0003, 0x17e8: 0x0003, 0x17e9: 0x0003, 0x17ea: 0x0003, 0x17eb: 0x0003, 0x17ec: 0x0003, 0x17ed: 0x0003, 0x17ee: 0x0003, 0x17ef: 0x0003, 0x17f0: 0x0003, 0x17f1: 0x0003, 0x17f2: 0x0003, 0x17f3: 0x0003, // Block 0x60, offset 0x1800 0x1800: 0x0003, 0x1801: 0x0003, 0x1802: 0x0003, 0x1803: 0x0003, 0x1804: 0x0003, 0x1805: 0x0003, 0x1806: 0x0003, 0x1807: 0x0003, 0x1808: 0x0003, 0x1809: 0x0003, 0x180a: 0x0003, 0x180b: 0x0003, 0x180c: 0x0003, 0x180d: 0x0003, 0x180e: 0x0003, 0x180f: 0x0003, 0x1812: 0x0003, 0x1813: 0x0003, 0x1814: 0x0003, 0x1815: 0x0003, 0x1820: 0x0003, 0x1821: 0x0003, 0x1823: 0x0003, 0x1824: 0x0003, 0x1825: 0x0003, 0x1826: 0x0003, 0x1827: 0x0003, 0x1828: 0x0003, 0x1829: 0x0003, 0x1832: 0x0003, 0x1833: 0x0003, 0x1836: 0x0003, 0x1837: 0x0003, 0x183c: 0x0003, 0x183d: 0x0003, // Block 0x61, offset 0x1840 0x1840: 0x0003, 0x1841: 0x0003, 0x1846: 0x0003, 0x1847: 0x0003, 0x1848: 0x0003, 0x184b: 0x0003, 0x184e: 0x0003, 0x184f: 0x0003, 0x1850: 0x0003, 0x1851: 0x0003, 0x1862: 0x0003, 0x1863: 0x0003, 0x1864: 0x0003, 0x1865: 0x0003, 0x186f: 0x0003, 0x187d: 0x0002, 0x187e: 0x0002, // Block 0x62, offset 0x1880 0x1885: 0x0003, 0x1886: 0x0003, 0x1889: 0x0003, 0x188e: 0x0003, 0x188f: 0x0003, 0x1894: 0x0002, 0x1895: 0x0002, 0x189c: 0x0003, 0x189e: 0x0003, 0x18b0: 0x0002, 0x18b1: 0x0002, 0x18b2: 0x0002, 0x18b3: 0x0002, 0x18b4: 0x0002, 0x18b5: 0x0002, 0x18b6: 0x0002, 0x18b7: 0x0002, // Block 0x63, offset 0x18c0 0x18c0: 0x0003, 0x18c2: 0x0003, 0x18c8: 0x0002, 0x18c9: 0x0002, 0x18ca: 0x0002, 0x18cb: 0x0002, 0x18cc: 0x0002, 0x18cd: 0x0002, 0x18ce: 0x0002, 0x18cf: 0x0002, 0x18d0: 0x0002, 0x18d1: 0x0002, 0x18d2: 0x0002, 0x18d3: 0x0002, 0x18e0: 0x0003, 0x18e1: 0x0003, 0x18e3: 0x0003, 0x18e4: 0x0003, 0x18e5: 0x0003, 0x18e7: 0x0003, 0x18e8: 0x0003, 0x18e9: 0x0003, 0x18ea: 0x0003, 0x18ec: 0x0003, 0x18ed: 0x0003, 0x18ef: 0x0003, 0x18ff: 0x0002, // Block 0x64, offset 0x1900 0x190a: 0x0002, 0x190b: 0x0002, 0x190c: 0x0002, 0x190d: 0x0002, 0x190e: 0x0002, 0x190f: 0x0002, 0x1913: 0x0002, 0x191e: 0x0003, 0x191f: 0x0003, 0x1921: 0x0002, 0x192a: 0x0002, 0x192b: 0x0002, 0x193d: 0x0002, 0x193e: 0x0002, 0x193f: 0x0003, // Block 0x65, offset 0x1940 0x1944: 0x0002, 0x1945: 0x0002, 0x1946: 0x0003, 0x1947: 0x0003, 0x1948: 0x0003, 0x1949: 0x0003, 0x194a: 0x0003, 0x194b: 0x0003, 0x194c: 0x0003, 0x194d: 0x0003, 0x194e: 0x0002, 0x194f: 0x0003, 0x1950: 0x0003, 0x1951: 0x0003, 0x1952: 0x0003, 0x1953: 0x0003, 0x1954: 0x0002, 0x1955: 0x0003, 0x1956: 0x0003, 0x1957: 0x0003, 0x1958: 0x0003, 0x1959: 0x0003, 0x195a: 0x0003, 0x195b: 0x0003, 0x195c: 0x0003, 0x195d: 0x0003, 0x195e: 0x0003, 0x195f: 0x0003, 0x1960: 0x0003, 0x1961: 0x0003, 0x1963: 0x0003, 0x1968: 0x0003, 0x1969: 0x0003, 0x196a: 0x0002, 0x196b: 0x0003, 0x196c: 0x0003, 0x196d: 0x0003, 0x196e: 0x0003, 0x196f: 0x0003, 0x1970: 0x0003, 0x1971: 0x0003, 0x1972: 0x0002, 0x1973: 0x0002, 0x1974: 0x0003, 0x1975: 0x0002, 0x1976: 0x0003, 0x1977: 0x0003, 0x1978: 0x0003, 0x1979: 0x0003, 0x197a: 0x0002, 0x197b: 0x0003, 0x197c: 0x0003, 0x197d: 0x0002, 0x197e: 0x0003, 0x197f: 0x0003, // Block 0x66, offset 0x1980 0x1985: 0x0002, 0x198a: 0x0002, 0x198b: 0x0002, 0x19a8: 0x0002, 0x19bd: 0x0003, // Block 0x67, offset 0x19c0 0x19cc: 0x0002, 0x19ce: 0x0002, 0x19d3: 0x0002, 0x19d4: 0x0002, 0x19d5: 0x0002, 0x19d7: 0x0002, 0x19f6: 0x0003, 0x19f7: 0x0003, 0x19f8: 0x0003, 0x19f9: 0x0003, 0x19fa: 0x0003, 0x19fb: 0x0003, 0x19fc: 0x0003, 0x19fd: 0x0003, 0x19fe: 0x0003, 0x19ff: 0x0003, // Block 0x68, offset 0x1a00 0x1a15: 0x0002, 0x1a16: 0x0002, 0x1a17: 0x0002, 0x1a30: 0x0002, 0x1a3f: 0x0002, // Block 0x69, offset 0x1a40 0x1a5b: 0x0002, 0x1a5c: 0x0002, // Block 0x6a, offset 0x1a80 0x1a90: 0x0002, 0x1a95: 0x0002, 0x1a96: 0x0003, 0x1a97: 0x0003, 0x1a98: 0x0003, 0x1a99: 0x0003, // Block 0x6b, offset 0x1ac0 0x1aef: 0x0001, 0x1af0: 0x0001, 0x1af1: 0x0001, // Block 0x6c, offset 0x1b00 0x1b3f: 0x0001, // Block 0x6d, offset 0x1b40 0x1b60: 0x0001, 0x1b61: 0x0001, 0x1b62: 0x0001, 0x1b63: 0x0001, 0x1b64: 0x0001, 0x1b65: 0x0001, 0x1b66: 0x0001, 0x1b67: 0x0001, 0x1b68: 0x0001, 0x1b69: 0x0001, 0x1b6a: 0x0001, 0x1b6b: 0x0001, 0x1b6c: 0x0001, 0x1b6d: 0x0001, 0x1b6e: 0x0001, 0x1b6f: 0x0001, 0x1b70: 0x0001, 0x1b71: 0x0001, 0x1b72: 0x0001, 0x1b73: 0x0001, 0x1b74: 0x0001, 0x1b75: 0x0001, 0x1b76: 0x0001, 0x1b77: 0x0001, 0x1b78: 0x0001, 0x1b79: 0x0001, 0x1b7a: 0x0001, 0x1b7b: 0x0001, 0x1b7c: 0x0001, 0x1b7d: 0x0001, 0x1b7e: 0x0001, 0x1b7f: 0x0001, // Block 0x6e, offset 0x1b80 0x1b80: 0x0002, 0x1b81: 0x0002, 0x1b82: 0x0002, 0x1b83: 0x0002, 0x1b84: 0x0002, 0x1b85: 0x0002, 0x1b86: 0x0002, 0x1b87: 0x0002, 0x1b88: 0x0002, 0x1b89: 0x0002, 0x1b8a: 0x0002, 0x1b8b: 0x0002, 0x1b8c: 0x0002, 0x1b8d: 0x0002, 0x1b8e: 0x0002, 0x1b8f: 0x0002, 0x1b90: 0x0002, 0x1b91: 0x0002, 0x1b92: 0x0002, 0x1b93: 0x0002, 0x1b94: 0x0002, 0x1b95: 0x0002, 0x1b96: 0x0002, 0x1b97: 0x0002, 0x1b98: 0x0002, 0x1b99: 0x0002, 0x1b9b: 0x0002, 0x1b9c: 0x0002, 0x1b9d: 0x0002, 0x1b9e: 0x0002, 0x1b9f: 0x0002, 0x1ba0: 0x0002, 0x1ba1: 0x0002, 0x1ba2: 0x0002, 0x1ba3: 0x0002, 0x1ba4: 0x0002, 0x1ba5: 0x0002, 0x1ba6: 0x0002, 0x1ba7: 0x0002, 0x1ba8: 0x0002, 0x1ba9: 0x0002, 0x1baa: 0x0002, 0x1bab: 0x0002, 0x1bac: 0x0002, 0x1bad: 0x0002, 0x1bae: 0x0002, 0x1baf: 0x0002, 0x1bb0: 0x0002, 0x1bb1: 0x0002, 0x1bb2: 0x0002, 0x1bb3: 0x0002, 0x1bb4: 0x0002, 0x1bb5: 0x0002, 0x1bb6: 0x0002, 0x1bb7: 0x0002, 0x1bb8: 0x0002, 0x1bb9: 0x0002, 0x1bba: 0x0002, 0x1bbb: 0x0002, 0x1bbc: 0x0002, 0x1bbd: 0x0002, 0x1bbe: 0x0002, 0x1bbf: 0x0002, // Block 0x6f, offset 0x1bc0 0x1bc0: 0x0002, 0x1bc1: 0x0002, 0x1bc2: 0x0002, 0x1bc3: 0x0002, 0x1bc4: 0x0002, 0x1bc5: 0x0002, 0x1bc6: 0x0002, 0x1bc7: 0x0002, 0x1bc8: 0x0002, 0x1bc9: 0x0002, 0x1bca: 0x0002, 0x1bcb: 0x0002, 0x1bcc: 0x0002, 0x1bcd: 0x0002, 0x1bce: 0x0002, 0x1bcf: 0x0002, 0x1bd0: 0x0002, 0x1bd1: 0x0002, 0x1bd2: 0x0002, 0x1bd3: 0x0002, 0x1bd4: 0x0002, 0x1bd5: 0x0002, 0x1bd6: 0x0002, 0x1bd7: 0x0002, 0x1bd8: 0x0002, 0x1bd9: 0x0002, 0x1bda: 0x0002, 0x1bdb: 0x0002, 0x1bdc: 0x0002, 0x1bdd: 0x0002, 0x1bde: 0x0002, 0x1bdf: 0x0002, 0x1be0: 0x0002, 0x1be1: 0x0002, 0x1be2: 0x0002, 0x1be3: 0x0002, 0x1be4: 0x0002, 0x1be5: 0x0002, 0x1be6: 0x0002, 0x1be7: 0x0002, 0x1be8: 0x0002, 0x1be9: 0x0002, 0x1bea: 0x0002, 0x1beb: 0x0002, 0x1bec: 0x0002, 0x1bed: 0x0002, 0x1bee: 0x0002, 0x1bef: 0x0002, 0x1bf0: 0x0002, 0x1bf1: 0x0002, 0x1bf2: 0x0002, 0x1bf3: 0x0002, // Block 0x70, offset 0x1c00 0x1c00: 0x0002, 0x1c01: 0x0002, 0x1c02: 0x0002, 0x1c03: 0x0002, 0x1c04: 0x0002, 0x1c05: 0x0002, 0x1c06: 0x0002, 0x1c07: 0x0002, 0x1c08: 0x0002, 0x1c09: 0x0002, 0x1c0a: 0x0002, 0x1c0b: 0x0002, 0x1c0c: 0x0002, 0x1c0d: 0x0002, 0x1c0e: 0x0002, 0x1c0f: 0x0002, 0x1c10: 0x0002, 0x1c11: 0x0002, 0x1c12: 0x0002, 0x1c13: 0x0002, 0x1c14: 0x0002, 0x1c15: 0x0002, 0x1c30: 0x0002, 0x1c31: 0x0002, 0x1c32: 0x0002, 0x1c33: 0x0002, 0x1c34: 0x0002, 0x1c35: 0x0002, 0x1c36: 0x0002, 0x1c37: 0x0002, 0x1c38: 0x0002, 0x1c39: 0x0002, 0x1c3a: 0x0002, 0x1c3b: 0x0002, 0x1c3c: 0x0002, 0x1c3d: 0x0002, 0x1c3e: 0x0002, 0x1c3f: 0x0002, // Block 0x71, offset 0x1c40 0x1c40: 0x0002, 0x1c41: 0x0002, 0x1c42: 0x0002, 0x1c43: 0x0002, 0x1c44: 0x0002, 0x1c45: 0x0002, 0x1c46: 0x0002, 0x1c47: 0x0002, 0x1c48: 0x0002, 0x1c49: 0x0002, 0x1c4a: 0x0002, 0x1c4b: 0x0002, 0x1c4c: 0x0002, 0x1c4d: 0x0002, 0x1c4e: 0x0002, 0x1c4f: 0x0002, 0x1c50: 0x0002, 0x1c51: 0x0002, 0x1c52: 0x0002, 0x1c53: 0x0002, 0x1c54: 0x0002, 0x1c55: 0x0002, 0x1c56: 0x0002, 0x1c57: 0x0002, 0x1c58: 0x0002, 0x1c59: 0x0002, 0x1c5a: 0x0002, 0x1c5b: 0x0002, 0x1c5c: 0x0002, 0x1c5d: 0x0002, 0x1c5e: 0x0002, 0x1c5f: 0x0002, 0x1c60: 0x0002, 0x1c61: 0x0002, 0x1c62: 0x0002, 0x1c63: 0x0002, 0x1c64: 0x0002, 0x1c65: 0x0002, 0x1c66: 0x0002, 0x1c67: 0x0002, 0x1c68: 0x0002, 0x1c69: 0x0002, 0x1c6a: 0x0001, 0x1c6b: 0x0001, 0x1c6c: 0x0001, 0x1c6d: 0x0001, 0x1c6e: 0x0002, 0x1c6f: 0x0002, 0x1c70: 0x0002, 0x1c71: 0x0002, 0x1c72: 0x0002, 0x1c73: 0x0002, 0x1c74: 0x0002, 0x1c75: 0x0002, 0x1c76: 0x0002, 0x1c77: 0x0002, 0x1c78: 0x0002, 0x1c79: 0x0002, 0x1c7a: 0x0002, 0x1c7b: 0x0002, 0x1c7c: 0x0002, 0x1c7d: 0x0002, 0x1c7e: 0x0002, // Block 0x72, offset 0x1c80 0x1c81: 0x0002, 0x1c82: 0x0002, 0x1c83: 0x0002, 0x1c84: 0x0002, 0x1c85: 0x0002, 0x1c86: 0x0002, 0x1c87: 0x0002, 0x1c88: 0x0002, 0x1c89: 0x0002, 0x1c8a: 0x0002, 0x1c8b: 0x0002, 0x1c8c: 0x0002, 0x1c8d: 0x0002, 0x1c8e: 0x0002, 0x1c8f: 0x0002, 0x1c90: 0x0002, 0x1c91: 0x0002, 0x1c92: 0x0002, 0x1c93: 0x0002, 0x1c94: 0x0002, 0x1c95: 0x0002, 0x1c96: 0x0002, 0x1c97: 0x0002, 0x1c98: 0x0002, 0x1c99: 0x0002, 0x1c9a: 0x0002, 0x1c9b: 0x0002, 0x1c9c: 0x0002, 0x1c9d: 0x0002, 0x1c9e: 0x0002, 0x1c9f: 0x0002, 0x1ca0: 0x0002, 0x1ca1: 0x0002, 0x1ca2: 0x0002, 0x1ca3: 0x0002, 0x1ca4: 0x0002, 0x1ca5: 0x0002, 0x1ca6: 0x0002, 0x1ca7: 0x0002, 0x1ca8: 0x0002, 0x1ca9: 0x0002, 0x1caa: 0x0002, 0x1cab: 0x0002, 0x1cac: 0x0002, 0x1cad: 0x0002, 0x1cae: 0x0002, 0x1caf: 0x0002, 0x1cb0: 0x0002, 0x1cb1: 0x0002, 0x1cb2: 0x0002, 0x1cb3: 0x0002, 0x1cb4: 0x0002, 0x1cb5: 0x0002, 0x1cb6: 0x0002, 0x1cb7: 0x0002, 0x1cb8: 0x0002, 0x1cb9: 0x0002, 0x1cba: 0x0002, 0x1cbb: 0x0002, 0x1cbc: 0x0002, 0x1cbd: 0x0002, 0x1cbe: 0x0002, 0x1cbf: 0x0002, // Block 0x73, offset 0x1cc0 0x1cc0: 0x0002, 0x1cc1: 0x0002, 0x1cc2: 0x0002, 0x1cc3: 0x0002, 0x1cc4: 0x0002, 0x1cc5: 0x0002, 0x1cc6: 0x0002, 0x1cc7: 0x0002, 0x1cc8: 0x0002, 0x1cc9: 0x0002, 0x1cca: 0x0002, 0x1ccb: 0x0002, 0x1ccc: 0x0002, 0x1ccd: 0x0002, 0x1cce: 0x0002, 0x1ccf: 0x0002, 0x1cd0: 0x0002, 0x1cd1: 0x0002, 0x1cd2: 0x0002, 0x1cd3: 0x0002, 0x1cd4: 0x0002, 0x1cd5: 0x0002, 0x1cd6: 0x0002, 0x1cd9: 0x0001, 0x1cda: 0x0001, 0x1cdb: 0x0002, 0x1cdc: 0x0002, 0x1cdd: 0x0002, 0x1cde: 0x0002, 0x1cdf: 0x0002, 0x1ce0: 0x0002, 0x1ce1: 0x0002, 0x1ce2: 0x0002, 0x1ce3: 0x0002, 0x1ce4: 0x0002, 0x1ce5: 0x0002, 0x1ce6: 0x0002, 0x1ce7: 0x0002, 0x1ce8: 0x0002, 0x1ce9: 0x0002, 0x1cea: 0x0002, 0x1ceb: 0x0002, 0x1cec: 0x0002, 0x1ced: 0x0002, 0x1cee: 0x0002, 0x1cef: 0x0002, 0x1cf0: 0x0002, 0x1cf1: 0x0002, 0x1cf2: 0x0002, 0x1cf3: 0x0002, 0x1cf4: 0x0002, 0x1cf5: 0x0002, 0x1cf6: 0x0002, 0x1cf7: 0x0002, 0x1cf8: 0x0002, 0x1cf9: 0x0002, 0x1cfa: 0x0002, 0x1cfb: 0x0002, 0x1cfc: 0x0002, 0x1cfd: 0x0002, 0x1cfe: 0x0002, 0x1cff: 0x0002, // Block 0x74, offset 0x1d00 0x1d05: 0x0002, 0x1d06: 0x0002, 0x1d07: 0x0002, 0x1d08: 0x0002, 0x1d09: 0x0002, 0x1d0a: 0x0002, 0x1d0b: 0x0002, 0x1d0c: 0x0002, 0x1d0d: 0x0002, 0x1d0e: 0x0002, 0x1d0f: 0x0002, 0x1d10: 0x0002, 0x1d11: 0x0002, 0x1d12: 0x0002, 0x1d13: 0x0002, 0x1d14: 0x0002, 0x1d15: 0x0002, 0x1d16: 0x0002, 0x1d17: 0x0002, 0x1d18: 0x0002, 0x1d19: 0x0002, 0x1d1a: 0x0002, 0x1d1b: 0x0002, 0x1d1c: 0x0002, 0x1d1d: 0x0002, 0x1d1e: 0x0002, 0x1d1f: 0x0002, 0x1d20: 0x0002, 0x1d21: 0x0002, 0x1d22: 0x0002, 0x1d23: 0x0002, 0x1d24: 0x0002, 0x1d25: 0x0002, 0x1d26: 0x0002, 0x1d27: 0x0002, 0x1d28: 0x0002, 0x1d29: 0x0002, 0x1d2a: 0x0002, 0x1d2b: 0x0002, 0x1d2c: 0x0002, 0x1d2d: 0x0002, 0x1d2e: 0x0002, 0x1d2f: 0x0002, 0x1d31: 0x0002, 0x1d32: 0x0002, 0x1d33: 0x0002, 0x1d34: 0x0002, 0x1d35: 0x0002, 0x1d36: 0x0002, 0x1d37: 0x0002, 0x1d38: 0x0002, 0x1d39: 0x0002, 0x1d3a: 0x0002, 0x1d3b: 0x0002, 0x1d3c: 0x0002, 0x1d3d: 0x0002, 0x1d3e: 0x0002, 0x1d3f: 0x0002, // Block 0x75, offset 0x1d40 0x1d40: 0x0002, 0x1d41: 0x0002, 0x1d42: 0x0002, 0x1d43: 0x0002, 0x1d44: 0x0002, 0x1d45: 0x0002, 0x1d46: 0x0002, 0x1d47: 0x0002, 0x1d48: 0x0002, 0x1d49: 0x0002, 0x1d4a: 0x0002, 0x1d4b: 0x0002, 0x1d4c: 0x0002, 0x1d4d: 0x0002, 0x1d4e: 0x0002, 0x1d50: 0x0002, 0x1d51: 0x0002, 0x1d52: 0x0002, 0x1d53: 0x0002, 0x1d54: 0x0002, 0x1d55: 0x0002, 0x1d56: 0x0002, 0x1d57: 0x0002, 0x1d58: 0x0002, 0x1d59: 0x0002, 0x1d5a: 0x0002, 0x1d5b: 0x0002, 0x1d5c: 0x0002, 0x1d5d: 0x0002, 0x1d5e: 0x0002, 0x1d5f: 0x0002, 0x1d60: 0x0002, 0x1d61: 0x0002, 0x1d62: 0x0002, 0x1d63: 0x0002, 0x1d64: 0x0002, 0x1d65: 0x0002, 0x1d66: 0x0002, 0x1d67: 0x0002, 0x1d68: 0x0002, 0x1d69: 0x0002, 0x1d6a: 0x0002, 0x1d6b: 0x0002, 0x1d6c: 0x0002, 0x1d6d: 0x0002, 0x1d6e: 0x0002, 0x1d6f: 0x0002, 0x1d70: 0x0002, 0x1d71: 0x0002, 0x1d72: 0x0002, 0x1d73: 0x0002, 0x1d74: 0x0002, 0x1d75: 0x0002, 0x1d76: 0x0002, 0x1d77: 0x0002, 0x1d78: 0x0002, 0x1d79: 0x0002, 0x1d7a: 0x0002, 0x1d7b: 0x0002, 0x1d7c: 0x0002, 0x1d7d: 0x0002, 0x1d7e: 0x0002, 0x1d7f: 0x0002, // Block 0x76, offset 0x1d80 0x1d80: 0x0002, 0x1d81: 0x0002, 0x1d82: 0x0002, 0x1d83: 0x0002, 0x1d84: 0x0002, 0x1d85: 0x0002, 0x1d86: 0x0002, 0x1d87: 0x0002, 0x1d88: 0x0002, 0x1d89: 0x0002, 0x1d8a: 0x0002, 0x1d8b: 0x0002, 0x1d8c: 0x0002, 0x1d8d: 0x0002, 0x1d8e: 0x0002, 0x1d8f: 0x0002, 0x1d90: 0x0002, 0x1d91: 0x0002, 0x1d92: 0x0002, 0x1d93: 0x0002, 0x1d94: 0x0002, 0x1d95: 0x0002, 0x1d96: 0x0002, 0x1d97: 0x0002, 0x1d98: 0x0002, 0x1d99: 0x0002, 0x1d9a: 0x0002, 0x1d9b: 0x0002, 0x1d9c: 0x0002, 0x1d9d: 0x0002, 0x1d9e: 0x0002, 0x1d9f: 0x0002, 0x1da0: 0x0002, 0x1da1: 0x0002, 0x1da2: 0x0002, 0x1da3: 0x0002, 0x1da4: 0x0002, 0x1da5: 0x0002, 0x1daf: 0x0002, 0x1db0: 0x0002, 0x1db1: 0x0002, 0x1db2: 0x0002, 0x1db3: 0x0002, 0x1db4: 0x0002, 0x1db5: 0x0002, 0x1db6: 0x0002, 0x1db7: 0x0002, 0x1db8: 0x0002, 0x1db9: 0x0002, 0x1dba: 0x0002, 0x1dbb: 0x0002, 0x1dbc: 0x0002, 0x1dbd: 0x0002, 0x1dbe: 0x0002, 0x1dbf: 0x0002, // Block 0x77, offset 0x1dc0 0x1dc0: 0x0002, 0x1dc1: 0x0002, 0x1dc2: 0x0002, 0x1dc3: 0x0002, 0x1dc4: 0x0002, 0x1dc5: 0x0002, 0x1dc6: 0x0002, 0x1dc7: 0x0002, 0x1dc8: 0x0002, 0x1dc9: 0x0002, 0x1dca: 0x0002, 0x1dcb: 0x0002, 0x1dcc: 0x0002, 0x1dcd: 0x0002, 0x1dce: 0x0002, 0x1dcf: 0x0002, 0x1dd0: 0x0002, 0x1dd1: 0x0002, 0x1dd2: 0x0002, 0x1dd3: 0x0002, 0x1dd4: 0x0002, 0x1dd5: 0x0002, 0x1dd6: 0x0002, 0x1dd7: 0x0002, 0x1dd8: 0x0002, 0x1dd9: 0x0002, 0x1dda: 0x0002, 0x1ddb: 0x0002, 0x1ddc: 0x0002, 0x1ddd: 0x0002, 0x1dde: 0x0002, 0x1de0: 0x0002, 0x1de1: 0x0002, 0x1de2: 0x0002, 0x1de3: 0x0002, 0x1de4: 0x0002, 0x1de5: 0x0002, 0x1de6: 0x0002, 0x1de7: 0x0002, 0x1de8: 0x0002, 0x1de9: 0x0002, 0x1dea: 0x0002, 0x1deb: 0x0002, 0x1dec: 0x0002, 0x1ded: 0x0002, 0x1dee: 0x0002, 0x1def: 0x0002, 0x1df0: 0x0002, 0x1df1: 0x0002, 0x1df2: 0x0002, 0x1df3: 0x0002, 0x1df4: 0x0002, 0x1df5: 0x0002, 0x1df6: 0x0002, 0x1df7: 0x0002, 0x1df8: 0x0002, 0x1df9: 0x0002, 0x1dfa: 0x0002, 0x1dfb: 0x0002, 0x1dfc: 0x0002, 0x1dfd: 0x0002, 0x1dfe: 0x0002, 0x1dff: 0x0002, // Block 0x78, offset 0x1e00 0x1e00: 0x0002, 0x1e01: 0x0002, 0x1e02: 0x0002, 0x1e03: 0x0002, 0x1e04: 0x0002, 0x1e05: 0x0002, 0x1e06: 0x0002, 0x1e07: 0x0002, 0x1e08: 0x0003, 0x1e09: 0x0003, 0x1e0a: 0x0003, 0x1e0b: 0x0003, 0x1e0c: 0x0003, 0x1e0d: 0x0003, 0x1e0e: 0x0003, 0x1e0f: 0x0003, 0x1e10: 0x0002, 0x1e11: 0x0002, 0x1e12: 0x0002, 0x1e13: 0x0002, 0x1e14: 0x0002, 0x1e15: 0x0002, 0x1e16: 0x0002, 0x1e17: 0x0002, 0x1e18: 0x0002, 0x1e19: 0x0002, 0x1e1a: 0x0002, 0x1e1b: 0x0002, 0x1e1c: 0x0002, 0x1e1d: 0x0002, 0x1e1e: 0x0002, 0x1e1f: 0x0002, 0x1e20: 0x0002, 0x1e21: 0x0002, 0x1e22: 0x0002, 0x1e23: 0x0002, 0x1e24: 0x0002, 0x1e25: 0x0002, 0x1e26: 0x0002, 0x1e27: 0x0002, 0x1e28: 0x0002, 0x1e29: 0x0002, 0x1e2a: 0x0002, 0x1e2b: 0x0002, 0x1e2c: 0x0002, 0x1e2d: 0x0002, 0x1e2e: 0x0002, 0x1e2f: 0x0002, 0x1e30: 0x0002, 0x1e31: 0x0002, 0x1e32: 0x0002, 0x1e33: 0x0002, 0x1e34: 0x0002, 0x1e35: 0x0002, 0x1e36: 0x0002, 0x1e37: 0x0002, 0x1e38: 0x0002, 0x1e39: 0x0002, 0x1e3a: 0x0002, 0x1e3b: 0x0002, 0x1e3c: 0x0002, 0x1e3d: 0x0002, 0x1e3e: 0x0002, 0x1e3f: 0x0002, // Block 0x79, offset 0x1e40 0x1e40: 0x0002, 0x1e41: 0x0002, 0x1e42: 0x0002, 0x1e43: 0x0002, 0x1e44: 0x0002, 0x1e45: 0x0002, 0x1e46: 0x0002, 0x1e47: 0x0002, 0x1e48: 0x0002, 0x1e49: 0x0002, 0x1e4a: 0x0002, 0x1e4b: 0x0002, 0x1e4c: 0x0002, 0x1e50: 0x0002, 0x1e51: 0x0002, 0x1e52: 0x0002, 0x1e53: 0x0002, 0x1e54: 0x0002, 0x1e55: 0x0002, 0x1e56: 0x0002, 0x1e57: 0x0002, 0x1e58: 0x0002, 0x1e59: 0x0002, 0x1e5a: 0x0002, 0x1e5b: 0x0002, 0x1e5c: 0x0002, 0x1e5d: 0x0002, 0x1e5e: 0x0002, 0x1e5f: 0x0002, 0x1e60: 0x0002, 0x1e61: 0x0002, 0x1e62: 0x0002, 0x1e63: 0x0002, 0x1e64: 0x0002, 0x1e65: 0x0002, 0x1e66: 0x0002, 0x1e67: 0x0002, 0x1e68: 0x0002, 0x1e69: 0x0002, 0x1e6a: 0x0002, 0x1e6b: 0x0002, 0x1e6c: 0x0002, 0x1e6d: 0x0002, 0x1e6e: 0x0002, 0x1e6f: 0x0002, 0x1e70: 0x0002, 0x1e71: 0x0002, 0x1e72: 0x0002, 0x1e73: 0x0002, 0x1e74: 0x0002, 0x1e75: 0x0002, 0x1e76: 0x0002, 0x1e77: 0x0002, 0x1e78: 0x0002, 0x1e79: 0x0002, 0x1e7a: 0x0002, 0x1e7b: 0x0002, 0x1e7c: 0x0002, 0x1e7d: 0x0002, 0x1e7e: 0x0002, 0x1e7f: 0x0002, // Block 0x7a, offset 0x1e80 0x1e80: 0x0002, 0x1e81: 0x0002, 0x1e82: 0x0002, 0x1e83: 0x0002, 0x1e84: 0x0002, 0x1e85: 0x0002, 0x1e86: 0x0002, // Block 0x7b, offset 0x1ec0 0x1eef: 0x0001, 0x1ef0: 0x0001, 0x1ef1: 0x0001, 0x1ef2: 0x0001, 0x1ef4: 0x0001, 0x1ef5: 0x0001, 0x1ef6: 0x0001, 0x1ef7: 0x0001, 0x1ef8: 0x0001, 0x1ef9: 0x0001, 0x1efa: 0x0001, 0x1efb: 0x0001, 0x1efc: 0x0001, 0x1efd: 0x0001, // Block 0x7c, offset 0x1f00 0x1f1e: 0x0001, 0x1f1f: 0x0001, // Block 0x7d, offset 0x1f40 0x1f70: 0x0001, 0x1f71: 0x0001, // Block 0x7e, offset 0x1f80 0x1f82: 0x0001, 0x1f86: 0x0001, 0x1f8b: 0x0001, 0x1fa5: 0x0001, 0x1fa6: 0x0001, 0x1fac: 0x0001, // Block 0x7f, offset 0x1fc0 0x1fc4: 0x0001, 0x1fc5: 0x0001, 0x1fe0: 0x0001, 0x1fe1: 0x0001, 0x1fe2: 0x0001, 0x1fe3: 0x0001, 0x1fe4: 0x0001, 0x1fe5: 0x0001, 0x1fe6: 0x0001, 0x1fe7: 0x0001, 0x1fe8: 0x0001, 0x1fe9: 0x0001, 0x1fea: 0x0001, 0x1feb: 0x0001, 0x1fec: 0x0001, 0x1fed: 0x0001, 0x1fee: 0x0001, 0x1fef: 0x0001, 0x1ff0: 0x0001, 0x1ff1: 0x0001, 0x1fff: 0x0001, // Block 0x80, offset 0x2000 0x2026: 0x0001, 0x2027: 0x0001, 0x2028: 0x0001, 0x2029: 0x0001, 0x202a: 0x0001, 0x202b: 0x0001, 0x202c: 0x0001, 0x202d: 0x0001, // Block 0x81, offset 0x2040 0x2047: 0x0001, 0x2048: 0x0001, 0x2049: 0x0001, 0x204a: 0x0001, 0x204b: 0x0001, 0x204c: 0x0001, 0x204d: 0x0001, 0x204e: 0x0001, 0x204f: 0x0001, 0x2050: 0x0001, 0x2051: 0x0001, 0x2060: 0x0002, 0x2061: 0x0002, 0x2062: 0x0002, 0x2063: 0x0002, 0x2064: 0x0002, 0x2065: 0x0002, 0x2066: 0x0002, 0x2067: 0x0002, 0x2068: 0x0002, 0x2069: 0x0002, 0x206a: 0x0002, 0x206b: 0x0002, 0x206c: 0x0002, 0x206d: 0x0002, 0x206e: 0x0002, 0x206f: 0x0002, 0x2070: 0x0002, 0x2071: 0x0002, 0x2072: 0x0002, 0x2073: 0x0002, 0x2074: 0x0002, 0x2075: 0x0002, 0x2076: 0x0002, 0x2077: 0x0002, 0x2078: 0x0002, 0x2079: 0x0002, 0x207a: 0x0002, 0x207b: 0x0002, 0x207c: 0x0002, // Block 0x82, offset 0x2080 0x2080: 0x0001, 0x2081: 0x0001, 0x2082: 0x0001, 0x20b3: 0x0001, 0x20b6: 0x0001, 0x20b7: 0x0001, 0x20b8: 0x0001, 0x20b9: 0x0001, 0x20bc: 0x0001, 0x20bd: 0x0001, // Block 0x83, offset 0x20c0 0x20e5: 0x0001, // Block 0x84, offset 0x2100 0x2129: 0x0001, 0x212a: 0x0001, 0x212b: 0x0001, 0x212c: 0x0001, 0x212d: 0x0001, 0x212e: 0x0001, 0x2131: 0x0001, 0x2132: 0x0001, 0x2135: 0x0001, 0x2136: 0x0001, // Block 0x85, offset 0x2140 0x2143: 0x0001, 0x214c: 0x0001, 0x217c: 0x0001, // Block 0x86, offset 0x2180 0x21b0: 0x0001, 0x21b2: 0x0001, 0x21b3: 0x0001, 0x21b4: 0x0001, 0x21b7: 0x0001, 0x21b8: 0x0001, 0x21be: 0x0001, 0x21bf: 0x0001, // Block 0x87, offset 0x21c0 0x21c1: 0x0001, 0x21ec: 0x0001, 0x21ed: 0x0001, 0x21f6: 0x0001, // Block 0x88, offset 0x2200 0x2225: 0x0001, 0x2228: 0x0001, 0x222d: 0x0001, // Block 0x89, offset 0x2240 0x2240: 0x0002, 0x2241: 0x0002, 0x2242: 0x0002, 0x2243: 0x0002, 0x2244: 0x0002, 0x2245: 0x0002, 0x2246: 0x0002, 0x2247: 0x0002, 0x2248: 0x0002, 0x2249: 0x0002, 0x224a: 0x0002, 0x224b: 0x0002, 0x224c: 0x0002, 0x224d: 0x0002, 0x224e: 0x0002, 0x224f: 0x0002, 0x2250: 0x0002, 0x2251: 0x0002, 0x2252: 0x0002, 0x2253: 0x0002, 0x2254: 0x0002, 0x2255: 0x0002, 0x2256: 0x0002, 0x2257: 0x0002, 0x2258: 0x0002, 0x2259: 0x0002, 0x225a: 0x0002, 0x225b: 0x0002, 0x225c: 0x0002, 0x225d: 0x0002, 0x225e: 0x0002, 0x225f: 0x0002, 0x2260: 0x0002, 0x2261: 0x0002, 0x2262: 0x0002, 0x2263: 0x0002, // Block 0x8a, offset 0x2280 0x229e: 0x0001, // Block 0x8b, offset 0x22c0 0x22c0: 0x0001, 0x22c1: 0x0001, 0x22c2: 0x0001, 0x22c3: 0x0001, 0x22c4: 0x0001, 0x22c5: 0x0001, 0x22c6: 0x0001, 0x22c7: 0x0001, 0x22c8: 0x0001, 0x22c9: 0x0001, 0x22ca: 0x0001, 0x22cb: 0x0001, 0x22cc: 0x0001, 0x22cd: 0x0001, 0x22ce: 0x0001, 0x22cf: 0x0001, 0x22d0: 0x0002, 0x22d1: 0x0002, 0x22d2: 0x0002, 0x22d3: 0x0002, 0x22d4: 0x0002, 0x22d5: 0x0002, 0x22d6: 0x0002, 0x22d7: 0x0002, 0x22d8: 0x0002, 0x22d9: 0x0002, 0x22e0: 0x0001, 0x22e1: 0x0001, 0x22e2: 0x0001, 0x22e3: 0x0001, 0x22e4: 0x0001, 0x22e5: 0x0001, 0x22e6: 0x0001, 0x22e7: 0x0001, 0x22e8: 0x0001, 0x22e9: 0x0001, 0x22ea: 0x0001, 0x22eb: 0x0001, 0x22ec: 0x0001, 0x22ed: 0x0001, 0x22ee: 0x0001, 0x22ef: 0x0001, 0x22f0: 0x0002, 0x22f1: 0x0002, 0x22f2: 0x0002, 0x22f3: 0x0002, 0x22f4: 0x0002, 0x22f5: 0x0002, 0x22f6: 0x0002, 0x22f7: 0x0002, 0x22f8: 0x0002, 0x22f9: 0x0002, 0x22fa: 0x0002, 0x22fb: 0x0002, 0x22fc: 0x0002, 0x22fd: 0x0002, 0x22fe: 0x0002, 0x22ff: 0x0002, // Block 0x8c, offset 0x2300 0x2300: 0x0002, 0x2301: 0x0002, 0x2302: 0x0002, 0x2303: 0x0002, 0x2304: 0x0002, 0x2305: 0x0002, 0x2306: 0x0002, 0x2307: 0x0002, 0x2308: 0x0002, 0x2309: 0x0002, 0x230a: 0x0002, 0x230b: 0x0002, 0x230c: 0x0002, 0x230d: 0x0002, 0x230e: 0x0002, 0x230f: 0x0002, 0x2310: 0x0002, 0x2311: 0x0002, 0x2312: 0x0002, 0x2314: 0x0002, 0x2315: 0x0002, 0x2316: 0x0002, 0x2317: 0x0002, 0x2318: 0x0002, 0x2319: 0x0002, 0x231a: 0x0002, 0x231b: 0x0002, 0x231c: 0x0002, 0x231d: 0x0002, 0x231e: 0x0002, 0x231f: 0x0002, 0x2320: 0x0002, 0x2321: 0x0002, 0x2322: 0x0002, 0x2323: 0x0002, 0x2324: 0x0002, 0x2325: 0x0002, 0x2326: 0x0002, 0x2328: 0x0002, 0x2329: 0x0002, 0x232a: 0x0002, 0x232b: 0x0002, // Block 0x8d, offset 0x2340 0x2340: 0x0002, 0x2341: 0x0002, 0x2342: 0x0002, 0x2343: 0x0002, 0x2344: 0x0002, 0x2345: 0x0002, 0x2346: 0x0002, 0x2347: 0x0002, 0x2348: 0x0002, 0x2349: 0x0002, 0x234a: 0x0002, 0x234b: 0x0002, 0x234c: 0x0002, 0x234d: 0x0002, 0x234e: 0x0002, 0x234f: 0x0002, 0x2350: 0x0002, 0x2351: 0x0002, 0x2352: 0x0002, 0x2353: 0x0002, 0x2354: 0x0002, 0x2355: 0x0002, 0x2356: 0x0002, 0x2357: 0x0002, 0x2358: 0x0002, 0x2359: 0x0002, 0x235a: 0x0002, 0x235b: 0x0002, 0x235c: 0x0002, 0x235d: 0x0002, 0x235e: 0x0002, 0x235f: 0x0002, 0x2360: 0x0002, // Block 0x8e, offset 0x2380 0x23a0: 0x0002, 0x23a1: 0x0002, 0x23a2: 0x0002, 0x23a3: 0x0002, 0x23a4: 0x0002, 0x23a5: 0x0002, 0x23a6: 0x0002, 0x23b9: 0x0001, 0x23ba: 0x0001, 0x23bb: 0x0001, 0x23be: 0x0001, 0x23bf: 0x0001, // Block 0x8f, offset 0x23c0 0x23fd: 0x0001, // Block 0x90, offset 0x2400 0x2420: 0x0001, // Block 0x91, offset 0x2440 0x2476: 0x0001, 0x2477: 0x0001, 0x2478: 0x0001, 0x2479: 0x0001, 0x247a: 0x0001, // Block 0x92, offset 0x2480 0x2481: 0x0001, 0x2482: 0x0001, 0x2483: 0x0001, 0x2485: 0x0001, 0x2486: 0x0001, 0x248c: 0x0001, 0x248d: 0x0001, 0x248e: 0x0001, 0x248f: 0x0001, 0x24b8: 0x0001, 0x24b9: 0x0001, 0x24ba: 0x0001, 0x24bf: 0x0001, // Block 0x93, offset 0x24c0 0x24e5: 0x0001, 0x24e6: 0x0001, // Block 0x94, offset 0x2500 0x2524: 0x0001, 0x2525: 0x0001, 0x2526: 0x0001, 0x2527: 0x0001, // Block 0x95, offset 0x2540 0x256b: 0x0001, 0x256c: 0x0001, // Block 0x96, offset 0x2580 0x25bd: 0x0001, 0x25be: 0x0001, 0x25bf: 0x0001, // Block 0x97, offset 0x25c0 0x25c6: 0x0001, 0x25c7: 0x0001, 0x25c8: 0x0001, 0x25c9: 0x0001, 0x25ca: 0x0001, 0x25cb: 0x0001, 0x25cc: 0x0001, 0x25cd: 0x0001, 0x25ce: 0x0001, 0x25cf: 0x0001, 0x25d0: 0x0001, // Block 0x98, offset 0x2600 0x2602: 0x0001, 0x2603: 0x0001, 0x2604: 0x0001, 0x2605: 0x0001, // Block 0x99, offset 0x2640 0x2641: 0x0001, 0x2678: 0x0001, 0x2679: 0x0001, 0x267a: 0x0001, 0x267b: 0x0001, 0x267c: 0x0001, 0x267d: 0x0001, 0x267e: 0x0001, 0x267f: 0x0001, // Block 0x9a, offset 0x2680 0x2680: 0x0001, 0x2681: 0x0001, 0x2682: 0x0001, 0x2683: 0x0001, 0x2684: 0x0001, 0x2685: 0x0001, 0x2686: 0x0001, 0x26b0: 0x0001, 0x26b3: 0x0001, 0x26b4: 0x0001, 0x26bf: 0x0001, // Block 0x9b, offset 0x26c0 0x26c0: 0x0001, 0x26c1: 0x0001, 0x26f3: 0x0001, 0x26f4: 0x0001, 0x26f5: 0x0001, 0x26f6: 0x0001, 0x26f9: 0x0001, 0x26fa: 0x0001, 0x26fd: 0x0001, // Block 0x9c, offset 0x2700 0x2702: 0x0001, 0x270d: 0x0001, // Block 0x9d, offset 0x2740 0x2740: 0x0001, 0x2741: 0x0001, 0x2742: 0x0001, 0x2767: 0x0001, 0x2768: 0x0001, 0x2769: 0x0001, 0x276a: 0x0001, 0x276b: 0x0001, 0x276d: 0x0001, 0x276e: 0x0001, 0x276f: 0x0001, 0x2770: 0x0001, 0x2771: 0x0001, 0x2772: 0x0001, 0x2773: 0x0001, 0x2774: 0x0001, // Block 0x9e, offset 0x2780 0x27b3: 0x0001, // Block 0x9f, offset 0x27c0 0x27c0: 0x0001, 0x27c1: 0x0001, 0x27f6: 0x0001, 0x27f7: 0x0001, 0x27f8: 0x0001, 0x27f9: 0x0001, 0x27fa: 0x0001, 0x27fb: 0x0001, 0x27fc: 0x0001, 0x27fd: 0x0001, 0x27fe: 0x0001, // Block 0xa0, offset 0x2800 0x2809: 0x0001, 0x280a: 0x0001, 0x280b: 0x0001, 0x280c: 0x0001, 0x280f: 0x0001, // Block 0xa1, offset 0x2840 0x286f: 0x0001, 0x2870: 0x0001, 0x2871: 0x0001, 0x2874: 0x0001, 0x2876: 0x0001, 0x2877: 0x0001, 0x287e: 0x0001, // Block 0xa2, offset 0x2880 0x289f: 0x0001, 0x28a3: 0x0001, 0x28a4: 0x0001, 0x28a5: 0x0001, 0x28a6: 0x0001, 0x28a7: 0x0001, 0x28a8: 0x0001, 0x28a9: 0x0001, 0x28aa: 0x0001, // Block 0xa3, offset 0x28c0 0x28c0: 0x0001, 0x28e6: 0x0001, 0x28e7: 0x0001, 0x28e8: 0x0001, 0x28e9: 0x0001, 0x28ea: 0x0001, 0x28eb: 0x0001, 0x28ec: 0x0001, 0x28f0: 0x0001, 0x28f1: 0x0001, 0x28f2: 0x0001, 0x28f3: 0x0001, 0x28f4: 0x0001, // Block 0xa4, offset 0x2900 0x2938: 0x0001, 0x2939: 0x0001, 0x293a: 0x0001, 0x293b: 0x0001, 0x293c: 0x0001, 0x293d: 0x0001, 0x293e: 0x0001, 0x293f: 0x0001, // Block 0xa5, offset 0x2940 0x2942: 0x0001, 0x2943: 0x0001, 0x2944: 0x0001, 0x2946: 0x0001, 0x295e: 0x0001, // Block 0xa6, offset 0x2980 0x29b3: 0x0001, 0x29b4: 0x0001, 0x29b5: 0x0001, 0x29b6: 0x0001, 0x29b7: 0x0001, 0x29b8: 0x0001, 0x29ba: 0x0001, 0x29bf: 0x0001, // Block 0xa7, offset 0x29c0 0x29c0: 0x0001, 0x29c2: 0x0001, 0x29c3: 0x0001, // Block 0xa8, offset 0x2a00 0x2a32: 0x0001, 0x2a33: 0x0001, 0x2a34: 0x0001, 0x2a35: 0x0001, 0x2a3c: 0x0001, 0x2a3d: 0x0001, 0x2a3f: 0x0001, // Block 0xa9, offset 0x2a40 0x2a40: 0x0001, 0x2a5c: 0x0001, 0x2a5d: 0x0001, // Block 0xaa, offset 0x2a80 0x2ab3: 0x0001, 0x2ab4: 0x0001, 0x2ab5: 0x0001, 0x2ab6: 0x0001, 0x2ab7: 0x0001, 0x2ab8: 0x0001, 0x2ab9: 0x0001, 0x2aba: 0x0001, 0x2abd: 0x0001, 0x2abf: 0x0001, // Block 0xab, offset 0x2ac0 0x2ac0: 0x0001, // Block 0xac, offset 0x2b00 0x2b2b: 0x0001, 0x2b2d: 0x0001, 0x2b30: 0x0001, 0x2b31: 0x0001, 0x2b32: 0x0001, 0x2b33: 0x0001, 0x2b34: 0x0001, 0x2b35: 0x0001, 0x2b37: 0x0001, // Block 0xad, offset 0x2b40 0x2b5d: 0x0001, 0x2b5e: 0x0001, 0x2b5f: 0x0001, 0x2b62: 0x0001, 0x2b63: 0x0001, 0x2b64: 0x0001, 0x2b65: 0x0001, 0x2b67: 0x0001, 0x2b68: 0x0001, 0x2b69: 0x0001, 0x2b6a: 0x0001, 0x2b6b: 0x0001, // Block 0xae, offset 0x2b80 0x2baf: 0x0001, 0x2bb0: 0x0001, 0x2bb1: 0x0001, 0x2bb2: 0x0001, 0x2bb3: 0x0001, 0x2bb4: 0x0001, 0x2bb5: 0x0001, 0x2bb6: 0x0001, 0x2bb7: 0x0001, 0x2bb9: 0x0001, 0x2bba: 0x0001, // Block 0xaf, offset 0x2bc0 0x2bfb: 0x0001, 0x2bfc: 0x0001, 0x2bfe: 0x0001, // Block 0xb0, offset 0x2c00 0x2c03: 0x0001, // Block 0xb1, offset 0x2c40 0x2c54: 0x0001, 0x2c55: 0x0001, 0x2c56: 0x0001, 0x2c57: 0x0001, 0x2c5a: 0x0001, 0x2c5b: 0x0001, 0x2c60: 0x0001, // Block 0xb2, offset 0x2c80 0x2c81: 0x0001, 0x2c82: 0x0001, 0x2c83: 0x0001, 0x2c84: 0x0001, 0x2c85: 0x0001, 0x2c86: 0x0001, 0x2c87: 0x0001, 0x2c88: 0x0001, 0x2c89: 0x0001, 0x2c8a: 0x0001, 0x2cb3: 0x0001, 0x2cb4: 0x0001, 0x2cb5: 0x0001, 0x2cb6: 0x0001, 0x2cb7: 0x0001, 0x2cb8: 0x0001, 0x2cbb: 0x0001, 0x2cbc: 0x0001, 0x2cbd: 0x0001, 0x2cbe: 0x0001, // Block 0xb3, offset 0x2cc0 0x2cc7: 0x0001, 0x2cd1: 0x0001, 0x2cd2: 0x0001, 0x2cd3: 0x0001, 0x2cd4: 0x0001, 0x2cd5: 0x0001, 0x2cd6: 0x0001, 0x2cd9: 0x0001, 0x2cda: 0x0001, 0x2cdb: 0x0001, // Block 0xb4, offset 0x2d00 0x2d0a: 0x0001, 0x2d0b: 0x0001, 0x2d0c: 0x0001, 0x2d0d: 0x0001, 0x2d0e: 0x0001, 0x2d0f: 0x0001, 0x2d10: 0x0001, 0x2d11: 0x0001, 0x2d12: 0x0001, 0x2d13: 0x0001, 0x2d14: 0x0001, 0x2d15: 0x0001, 0x2d16: 0x0001, 0x2d18: 0x0001, 0x2d19: 0x0001, // Block 0xb5, offset 0x2d40 0x2d70: 0x0001, 0x2d71: 0x0001, 0x2d72: 0x0001, 0x2d73: 0x0001, 0x2d74: 0x0001, 0x2d75: 0x0001, 0x2d76: 0x0001, 0x2d78: 0x0001, 0x2d79: 0x0001, 0x2d7a: 0x0001, 0x2d7b: 0x0001, 0x2d7c: 0x0001, 0x2d7d: 0x0001, 0x2d7f: 0x0001, // Block 0xb6, offset 0x2d80 0x2d92: 0x0001, 0x2d93: 0x0001, 0x2d94: 0x0001, 0x2d95: 0x0001, 0x2d96: 0x0001, 0x2d97: 0x0001, 0x2d98: 0x0001, 0x2d99: 0x0001, 0x2d9a: 0x0001, 0x2d9b: 0x0001, 0x2d9c: 0x0001, 0x2d9d: 0x0001, 0x2d9e: 0x0001, 0x2d9f: 0x0001, 0x2da0: 0x0001, 0x2da1: 0x0001, 0x2da2: 0x0001, 0x2da3: 0x0001, 0x2da4: 0x0001, 0x2da5: 0x0001, 0x2da6: 0x0001, 0x2da7: 0x0001, 0x2daa: 0x0001, 0x2dab: 0x0001, 0x2dac: 0x0001, 0x2dad: 0x0001, 0x2dae: 0x0001, 0x2daf: 0x0001, 0x2db0: 0x0001, 0x2db2: 0x0001, 0x2db3: 0x0001, 0x2db5: 0x0001, 0x2db6: 0x0001, // Block 0xb7, offset 0x2dc0 0x2df1: 0x0001, 0x2df2: 0x0001, 0x2df3: 0x0001, 0x2df4: 0x0001, 0x2df5: 0x0001, 0x2df6: 0x0001, 0x2dfa: 0x0001, 0x2dfc: 0x0001, 0x2dfd: 0x0001, 0x2dff: 0x0001, // Block 0xb8, offset 0x2e00 0x2e00: 0x0001, 0x2e01: 0x0001, 0x2e02: 0x0001, 0x2e03: 0x0001, 0x2e04: 0x0001, 0x2e05: 0x0001, 0x2e07: 0x0001, // Block 0xb9, offset 0x2e40 0x2e50: 0x0001, 0x2e51: 0x0001, 0x2e55: 0x0001, 0x2e57: 0x0001, // Block 0xba, offset 0x2e80 0x2eb3: 0x0001, 0x2eb4: 0x0001, // Block 0xbb, offset 0x2ec0 0x2ec0: 0x0001, 0x2ec1: 0x0001, 0x2ef6: 0x0001, 0x2ef7: 0x0001, 0x2ef8: 0x0001, 0x2ef9: 0x0001, 0x2efa: 0x0001, // Block 0xbc, offset 0x2f00 0x2f00: 0x0001, 0x2f02: 0x0001, // Block 0xbd, offset 0x2f40 0x2f40: 0x0001, 0x2f47: 0x0001, 0x2f48: 0x0001, 0x2f49: 0x0001, 0x2f4a: 0x0001, 0x2f4b: 0x0001, 0x2f4c: 0x0001, 0x2f4d: 0x0001, 0x2f4e: 0x0001, 0x2f4f: 0x0001, 0x2f50: 0x0001, 0x2f51: 0x0001, 0x2f52: 0x0001, 0x2f53: 0x0001, 0x2f54: 0x0001, 0x2f55: 0x0001, // Block 0xbe, offset 0x2f80 0x2fb0: 0x0001, 0x2fb1: 0x0001, 0x2fb2: 0x0001, 0x2fb3: 0x0001, 0x2fb4: 0x0001, // Block 0xbf, offset 0x2fc0 0x2ff0: 0x0001, 0x2ff1: 0x0001, 0x2ff2: 0x0001, 0x2ff3: 0x0001, 0x2ff4: 0x0001, 0x2ff5: 0x0001, 0x2ff6: 0x0001, // Block 0xc0, offset 0x3000 0x300f: 0x0001, // Block 0xc1, offset 0x3040 0x304f: 0x0001, 0x3050: 0x0001, 0x3051: 0x0001, 0x3052: 0x0001, // Block 0xc2, offset 0x3080 0x30a0: 0x0002, 0x30a1: 0x0002, 0x30a2: 0x0002, 0x30a3: 0x0002, 0x30a4: 0x0001, 0x30b0: 0x0002, 0x30b1: 0x0002, 0x30b2: 0x0002, 0x30b3: 0x0002, 0x30b4: 0x0002, 0x30b5: 0x0002, 0x30b6: 0x0002, // Block 0xc3, offset 0x30c0 0x30c0: 0x0002, 0x30c1: 0x0002, 0x30c2: 0x0002, 0x30c3: 0x0002, 0x30c4: 0x0002, 0x30c5: 0x0002, 0x30c6: 0x0002, 0x30c7: 0x0002, 0x30c8: 0x0002, 0x30c9: 0x0002, 0x30ca: 0x0002, 0x30cb: 0x0002, 0x30cc: 0x0002, 0x30cd: 0x0002, 0x30ce: 0x0002, 0x30cf: 0x0002, 0x30d0: 0x0002, 0x30d1: 0x0002, 0x30d2: 0x0002, 0x30d3: 0x0002, 0x30d4: 0x0002, 0x30d5: 0x0002, 0x30ff: 0x0002, // Block 0xc4, offset 0x3100 0x3100: 0x0002, 0x3101: 0x0002, 0x3102: 0x0002, 0x3103: 0x0002, 0x3104: 0x0002, 0x3105: 0x0002, 0x3106: 0x0002, 0x3107: 0x0002, 0x3108: 0x0002, 0x3109: 0x0002, 0x310a: 0x0002, 0x310b: 0x0002, 0x310c: 0x0002, 0x310d: 0x0002, 0x310e: 0x0002, 0x310f: 0x0002, 0x3110: 0x0002, 0x3111: 0x0002, 0x3112: 0x0002, 0x3113: 0x0002, 0x3114: 0x0002, 0x3115: 0x0002, 0x3116: 0x0002, 0x3117: 0x0002, 0x3118: 0x0002, 0x3119: 0x0002, 0x311a: 0x0002, 0x311b: 0x0002, 0x311c: 0x0002, 0x311d: 0x0002, 0x311e: 0x0002, // Block 0xc5, offset 0x3140 0x3140: 0x0002, 0x3141: 0x0002, 0x3142: 0x0002, 0x3143: 0x0002, 0x3144: 0x0002, 0x3145: 0x0002, 0x3146: 0x0002, 0x3147: 0x0002, 0x3148: 0x0002, 0x3149: 0x0002, 0x314a: 0x0002, 0x314b: 0x0002, 0x314c: 0x0002, 0x314d: 0x0002, 0x314e: 0x0002, 0x314f: 0x0002, 0x3150: 0x0002, 0x3151: 0x0002, 0x3152: 0x0002, 0x3153: 0x0002, 0x3154: 0x0002, 0x3155: 0x0002, 0x3156: 0x0002, 0x3157: 0x0002, 0x3158: 0x0002, 0x3159: 0x0002, 0x315a: 0x0002, 0x315b: 0x0002, 0x315c: 0x0002, 0x315d: 0x0002, 0x315e: 0x0002, 0x315f: 0x0002, 0x3160: 0x0002, 0x3161: 0x0002, 0x3162: 0x0002, 0x3163: 0x0002, 0x3164: 0x0002, 0x3165: 0x0002, 0x3166: 0x0002, 0x3167: 0x0002, 0x3168: 0x0002, 0x3169: 0x0002, 0x316a: 0x0002, 0x316b: 0x0002, 0x316c: 0x0002, 0x316d: 0x0002, 0x316e: 0x0002, 0x316f: 0x0002, 0x3170: 0x0002, 0x3171: 0x0002, 0x3172: 0x0002, // Block 0xc6, offset 0x3180 0x31b0: 0x0002, 0x31b1: 0x0002, 0x31b2: 0x0002, 0x31b3: 0x0002, 0x31b5: 0x0002, 0x31b6: 0x0002, 0x31b7: 0x0002, 0x31b8: 0x0002, 0x31b9: 0x0002, 0x31ba: 0x0002, 0x31bb: 0x0002, 0x31bd: 0x0002, 0x31be: 0x0002, // Block 0xc7, offset 0x31c0 0x31c0: 0x0002, 0x31c1: 0x0002, 0x31c2: 0x0002, 0x31c3: 0x0002, 0x31c4: 0x0002, 0x31c5: 0x0002, 0x31c6: 0x0002, 0x31c7: 0x0002, 0x31c8: 0x0002, 0x31c9: 0x0002, 0x31ca: 0x0002, 0x31cb: 0x0002, 0x31cc: 0x0002, 0x31cd: 0x0002, 0x31ce: 0x0002, 0x31cf: 0x0002, 0x31d0: 0x0002, 0x31d1: 0x0002, 0x31d2: 0x0002, 0x31d3: 0x0002, 0x31d4: 0x0002, 0x31d5: 0x0002, 0x31d6: 0x0002, 0x31d7: 0x0002, 0x31d8: 0x0002, 0x31d9: 0x0002, 0x31da: 0x0002, 0x31db: 0x0002, 0x31dc: 0x0002, 0x31dd: 0x0002, 0x31de: 0x0002, 0x31df: 0x0002, 0x31e0: 0x0002, 0x31e1: 0x0002, 0x31e2: 0x0002, 0x31f2: 0x0002, // Block 0xc8, offset 0x3200 0x3210: 0x0002, 0x3211: 0x0002, 0x3212: 0x0002, 0x3215: 0x0002, 0x3224: 0x0002, 0x3225: 0x0002, 0x3226: 0x0002, 0x3227: 0x0002, 0x3230: 0x0002, 0x3231: 0x0002, 0x3232: 0x0002, 0x3233: 0x0002, 0x3234: 0x0002, 0x3235: 0x0002, 0x3236: 0x0002, 0x3237: 0x0002, 0x3238: 0x0002, 0x3239: 0x0002, 0x323a: 0x0002, 0x323b: 0x0002, 0x323c: 0x0002, 0x323d: 0x0002, 0x323e: 0x0002, 0x323f: 0x0002, // Block 0xc9, offset 0x3240 0x3240: 0x0002, 0x3241: 0x0002, 0x3242: 0x0002, 0x3243: 0x0002, 0x3244: 0x0002, 0x3245: 0x0002, 0x3246: 0x0002, 0x3247: 0x0002, 0x3248: 0x0002, 0x3249: 0x0002, 0x324a: 0x0002, 0x324b: 0x0002, 0x324c: 0x0002, 0x324d: 0x0002, 0x324e: 0x0002, 0x324f: 0x0002, 0x3250: 0x0002, 0x3251: 0x0002, 0x3252: 0x0002, 0x3253: 0x0002, 0x3254: 0x0002, 0x3255: 0x0002, 0x3256: 0x0002, 0x3257: 0x0002, 0x3258: 0x0002, 0x3259: 0x0002, 0x325a: 0x0002, 0x325b: 0x0002, 0x325c: 0x0002, 0x325d: 0x0002, 0x325e: 0x0002, 0x325f: 0x0002, 0x3260: 0x0002, 0x3261: 0x0002, 0x3262: 0x0002, 0x3263: 0x0002, 0x3264: 0x0002, 0x3265: 0x0002, 0x3266: 0x0002, 0x3267: 0x0002, 0x3268: 0x0002, 0x3269: 0x0002, 0x326a: 0x0002, 0x326b: 0x0002, 0x326c: 0x0002, 0x326d: 0x0002, 0x326e: 0x0002, 0x326f: 0x0002, 0x3270: 0x0002, 0x3271: 0x0002, 0x3272: 0x0002, 0x3273: 0x0002, 0x3274: 0x0002, 0x3275: 0x0002, 0x3276: 0x0002, 0x3277: 0x0002, 0x3278: 0x0002, 0x3279: 0x0002, 0x327a: 0x0002, 0x327b: 0x0002, // Block 0xca, offset 0x3280 0x329d: 0x0001, 0x329e: 0x0001, 0x32a0: 0x0001, 0x32a1: 0x0001, 0x32a2: 0x0001, 0x32a3: 0x0001, // Block 0xcb, offset 0x32c0 0x32c0: 0x0001, 0x32c1: 0x0001, 0x32c2: 0x0001, 0x32c3: 0x0001, 0x32c4: 0x0001, 0x32c5: 0x0001, 0x32c6: 0x0001, 0x32c7: 0x0001, 0x32c8: 0x0001, 0x32c9: 0x0001, 0x32ca: 0x0001, 0x32cb: 0x0001, 0x32cc: 0x0001, 0x32cd: 0x0001, 0x32ce: 0x0001, 0x32cf: 0x0001, 0x32d0: 0x0001, 0x32d1: 0x0001, 0x32d2: 0x0001, 0x32d3: 0x0001, 0x32d4: 0x0001, 0x32d5: 0x0001, 0x32d6: 0x0001, 0x32d7: 0x0001, 0x32d8: 0x0001, 0x32d9: 0x0001, 0x32da: 0x0001, 0x32db: 0x0001, 0x32dc: 0x0001, 0x32dd: 0x0001, 0x32de: 0x0001, 0x32df: 0x0001, 0x32e0: 0x0001, 0x32e1: 0x0001, 0x32e2: 0x0001, 0x32e3: 0x0001, 0x32e4: 0x0001, 0x32e5: 0x0001, 0x32e6: 0x0001, 0x32e7: 0x0001, 0x32e8: 0x0001, 0x32e9: 0x0001, 0x32ea: 0x0001, 0x32eb: 0x0001, 0x32ec: 0x0001, 0x32ed: 0x0001, 0x32f0: 0x0001, 0x32f1: 0x0001, 0x32f2: 0x0001, 0x32f3: 0x0001, 0x32f4: 0x0001, 0x32f5: 0x0001, 0x32f6: 0x0001, 0x32f7: 0x0001, 0x32f8: 0x0001, 0x32f9: 0x0001, 0x32fa: 0x0001, 0x32fb: 0x0001, 0x32fc: 0x0001, 0x32fd: 0x0001, 0x32fe: 0x0001, 0x32ff: 0x0001, // Block 0xcc, offset 0x3300 0x3300: 0x0001, 0x3301: 0x0001, 0x3302: 0x0001, 0x3303: 0x0001, 0x3304: 0x0001, 0x3305: 0x0001, 0x3306: 0x0001, // Block 0xcd, offset 0x3340 0x3367: 0x0001, 0x3368: 0x0001, 0x3369: 0x0001, 0x3373: 0x0001, 0x3374: 0x0001, 0x3375: 0x0001, 0x3376: 0x0001, 0x3377: 0x0001, 0x3378: 0x0001, 0x3379: 0x0001, 0x337a: 0x0001, 0x337b: 0x0001, 0x337c: 0x0001, 0x337d: 0x0001, 0x337e: 0x0001, 0x337f: 0x0001, // Block 0xce, offset 0x3380 0x3380: 0x0001, 0x3381: 0x0001, 0x3382: 0x0001, 0x3385: 0x0001, 0x3386: 0x0001, 0x3387: 0x0001, 0x3388: 0x0001, 0x3389: 0x0001, 0x338a: 0x0001, 0x338b: 0x0001, 0x33aa: 0x0001, 0x33ab: 0x0001, 0x33ac: 0x0001, 0x33ad: 0x0001, // Block 0xcf, offset 0x33c0 0x33c2: 0x0001, 0x33c3: 0x0001, 0x33c4: 0x0001, // Block 0xd0, offset 0x3400 0x3400: 0x0002, 0x3401: 0x0002, 0x3402: 0x0002, 0x3403: 0x0002, 0x3404: 0x0002, 0x3405: 0x0002, 0x3406: 0x0002, 0x3407: 0x0002, 0x3408: 0x0002, 0x3409: 0x0002, 0x340a: 0x0002, 0x340b: 0x0002, 0x340c: 0x0002, 0x340d: 0x0002, 0x340e: 0x0002, 0x340f: 0x0002, 0x3410: 0x0002, 0x3411: 0x0002, 0x3412: 0x0002, 0x3413: 0x0002, 0x3414: 0x0002, 0x3415: 0x0002, 0x3416: 0x0002, 0x3420: 0x0002, 0x3421: 0x0002, 0x3422: 0x0002, 0x3423: 0x0002, 0x3424: 0x0002, 0x3425: 0x0002, 0x3426: 0x0002, 0x3427: 0x0002, 0x3428: 0x0002, 0x3429: 0x0002, 0x342a: 0x0002, 0x342b: 0x0002, 0x342c: 0x0002, 0x342d: 0x0002, 0x342e: 0x0002, 0x342f: 0x0002, 0x3430: 0x0002, 0x3431: 0x0002, 0x3432: 0x0002, 0x3433: 0x0002, 0x3434: 0x0002, 0x3435: 0x0002, 0x3436: 0x0002, // Block 0xd1, offset 0x3440 0x3440: 0x0001, 0x3441: 0x0001, 0x3442: 0x0001, 0x3443: 0x0001, 0x3444: 0x0001, 0x3445: 0x0001, 0x3446: 0x0001, 0x3447: 0x0001, 0x3448: 0x0001, 0x3449: 0x0001, 0x344a: 0x0001, 0x344b: 0x0001, 0x344c: 0x0001, 0x344d: 0x0001, 0x344e: 0x0001, 0x344f: 0x0001, 0x3450: 0x0001, 0x3451: 0x0001, 0x3452: 0x0001, 0x3453: 0x0001, 0x3454: 0x0001, 0x3455: 0x0001, 0x3456: 0x0001, 0x3457: 0x0001, 0x3458: 0x0001, 0x3459: 0x0001, 0x345a: 0x0001, 0x345b: 0x0001, 0x345c: 0x0001, 0x345d: 0x0001, 0x345e: 0x0001, 0x345f: 0x0001, 0x3460: 0x0001, 0x3461: 0x0001, 0x3462: 0x0001, 0x3463: 0x0001, 0x3464: 0x0001, 0x3465: 0x0001, 0x3466: 0x0001, 0x3467: 0x0001, 0x3468: 0x0001, 0x3469: 0x0001, 0x346a: 0x0001, 0x346b: 0x0001, 0x346c: 0x0001, 0x346d: 0x0001, 0x346e: 0x0001, 0x346f: 0x0001, 0x3470: 0x0001, 0x3471: 0x0001, 0x3472: 0x0001, 0x3473: 0x0001, 0x3474: 0x0001, 0x3475: 0x0001, 0x3476: 0x0001, 0x347b: 0x0001, 0x347c: 0x0001, 0x347d: 0x0001, 0x347e: 0x0001, 0x347f: 0x0001, // Block 0xd2, offset 0x3480 0x3480: 0x0001, 0x3481: 0x0001, 0x3482: 0x0001, 0x3483: 0x0001, 0x3484: 0x0001, 0x3485: 0x0001, 0x3486: 0x0001, 0x3487: 0x0001, 0x3488: 0x0001, 0x3489: 0x0001, 0x348a: 0x0001, 0x348b: 0x0001, 0x348c: 0x0001, 0x348d: 0x0001, 0x348e: 0x0001, 0x348f: 0x0001, 0x3490: 0x0001, 0x3491: 0x0001, 0x3492: 0x0001, 0x3493: 0x0001, 0x3494: 0x0001, 0x3495: 0x0001, 0x3496: 0x0001, 0x3497: 0x0001, 0x3498: 0x0001, 0x3499: 0x0001, 0x349a: 0x0001, 0x349b: 0x0001, 0x349c: 0x0001, 0x349d: 0x0001, 0x349e: 0x0001, 0x349f: 0x0001, 0x34a0: 0x0001, 0x34a1: 0x0001, 0x34a2: 0x0001, 0x34a3: 0x0001, 0x34a4: 0x0001, 0x34a5: 0x0001, 0x34a6: 0x0001, 0x34a7: 0x0001, 0x34a8: 0x0001, 0x34a9: 0x0001, 0x34aa: 0x0001, 0x34ab: 0x0001, 0x34ac: 0x0001, 0x34b5: 0x0001, // Block 0xd3, offset 0x34c0 0x34c4: 0x0001, 0x34db: 0x0001, 0x34dc: 0x0001, 0x34dd: 0x0001, 0x34de: 0x0001, 0x34df: 0x0001, 0x34e1: 0x0001, 0x34e2: 0x0001, 0x34e3: 0x0001, 0x34e4: 0x0001, 0x34e5: 0x0001, 0x34e6: 0x0001, 0x34e7: 0x0001, 0x34e8: 0x0001, 0x34e9: 0x0001, 0x34ea: 0x0001, 0x34eb: 0x0001, 0x34ec: 0x0001, 0x34ed: 0x0001, 0x34ee: 0x0001, 0x34ef: 0x0001, // Block 0xd4, offset 0x3500 0x3500: 0x0001, 0x3501: 0x0001, 0x3502: 0x0001, 0x3503: 0x0001, 0x3504: 0x0001, 0x3505: 0x0001, 0x3506: 0x0001, 0x3508: 0x0001, 0x3509: 0x0001, 0x350a: 0x0001, 0x350b: 0x0001, 0x350c: 0x0001, 0x350d: 0x0001, 0x350e: 0x0001, 0x350f: 0x0001, 0x3510: 0x0001, 0x3511: 0x0001, 0x3512: 0x0001, 0x3513: 0x0001, 0x3514: 0x0001, 0x3515: 0x0001, 0x3516: 0x0001, 0x3517: 0x0001, 0x3518: 0x0001, 0x351b: 0x0001, 0x351c: 0x0001, 0x351d: 0x0001, 0x351e: 0x0001, 0x351f: 0x0001, 0x3520: 0x0001, 0x3521: 0x0001, 0x3523: 0x0001, 0x3524: 0x0001, 0x3526: 0x0001, 0x3527: 0x0001, 0x3528: 0x0001, 0x3529: 0x0001, 0x352a: 0x0001, // Block 0xd5, offset 0x3540 0x356e: 0x0001, // Block 0xd6, offset 0x3580 0x35ac: 0x0001, 0x35ad: 0x0001, 0x35ae: 0x0001, 0x35af: 0x0001, // Block 0xd7, offset 0x35c0 0x35d0: 0x0001, 0x35d1: 0x0001, 0x35d2: 0x0001, 0x35d3: 0x0001, 0x35d4: 0x0001, 0x35d5: 0x0001, 0x35d6: 0x0001, // Block 0xd8, offset 0x3600 0x3604: 0x0001, 0x3605: 0x0001, 0x3606: 0x0001, 0x3607: 0x0001, 0x3608: 0x0001, 0x3609: 0x0001, 0x360a: 0x0001, // Block 0xd9, offset 0x3640 0x3644: 0x0002, // Block 0xda, offset 0x3680 0x368f: 0x0002, // Block 0xdb, offset 0x36c0 0x36c0: 0x0003, 0x36c1: 0x0003, 0x36c2: 0x0003, 0x36c3: 0x0003, 0x36c4: 0x0003, 0x36c5: 0x0003, 0x36c6: 0x0003, 0x36c7: 0x0003, 0x36c8: 0x0003, 0x36c9: 0x0003, 0x36ca: 0x0003, 0x36d0: 0x0003, 0x36d1: 0x0003, 0x36d2: 0x0003, 0x36d3: 0x0003, 0x36d4: 0x0003, 0x36d5: 0x0003, 0x36d6: 0x0003, 0x36d7: 0x0003, 0x36d8: 0x0003, 0x36d9: 0x0003, 0x36da: 0x0003, 0x36db: 0x0003, 0x36dc: 0x0003, 0x36dd: 0x0003, 0x36de: 0x0003, 0x36df: 0x0003, 0x36e0: 0x0003, 0x36e1: 0x0003, 0x36e2: 0x0003, 0x36e3: 0x0003, 0x36e4: 0x0003, 0x36e5: 0x0003, 0x36e6: 0x0003, 0x36e7: 0x0003, 0x36e8: 0x0003, 0x36e9: 0x0003, 0x36ea: 0x0003, 0x36eb: 0x0003, 0x36ec: 0x0003, 0x36ed: 0x0003, 0x36f0: 0x0003, 0x36f1: 0x0003, 0x36f2: 0x0003, 0x36f3: 0x0003, 0x36f4: 0x0003, 0x36f5: 0x0003, 0x36f6: 0x0003, 0x36f7: 0x0003, 0x36f8: 0x0003, 0x36f9: 0x0003, 0x36fa: 0x0003, 0x36fb: 0x0003, 0x36fc: 0x0003, 0x36fd: 0x0003, 0x36fe: 0x0003, 0x36ff: 0x0003, // Block 0xdc, offset 0x3700 0x3700: 0x0003, 0x3701: 0x0003, 0x3702: 0x0003, 0x3703: 0x0003, 0x3704: 0x0003, 0x3705: 0x0003, 0x3706: 0x0003, 0x3707: 0x0003, 0x3708: 0x0003, 0x3709: 0x0003, 0x370a: 0x0003, 0x370b: 0x0003, 0x370c: 0x0003, 0x370d: 0x0003, 0x370e: 0x0003, 0x370f: 0x0003, 0x3710: 0x0003, 0x3711: 0x0003, 0x3712: 0x0003, 0x3713: 0x0003, 0x3714: 0x0003, 0x3715: 0x0003, 0x3716: 0x0003, 0x3717: 0x0003, 0x3718: 0x0003, 0x3719: 0x0003, 0x371a: 0x0003, 0x371b: 0x0003, 0x371c: 0x0003, 0x371d: 0x0003, 0x371e: 0x0003, 0x371f: 0x0003, 0x3720: 0x0003, 0x3721: 0x0003, 0x3722: 0x0003, 0x3723: 0x0003, 0x3724: 0x0003, 0x3725: 0x0003, 0x3726: 0x0003, 0x3727: 0x0003, 0x3728: 0x0003, 0x3729: 0x0003, 0x3730: 0x0003, 0x3731: 0x0003, 0x3732: 0x0003, 0x3733: 0x0003, 0x3734: 0x0003, 0x3735: 0x0003, 0x3736: 0x0003, 0x3737: 0x0003, 0x3738: 0x0003, 0x3739: 0x0003, 0x373a: 0x0003, 0x373b: 0x0003, 0x373c: 0x0003, 0x373d: 0x0003, 0x373e: 0x0003, 0x373f: 0x0003, // Block 0xdd, offset 0x3740 0x3740: 0x0003, 0x3741: 0x0003, 0x3742: 0x0003, 0x3743: 0x0003, 0x3744: 0x0003, 0x3745: 0x0003, 0x3746: 0x0003, 0x3747: 0x0003, 0x3748: 0x0003, 0x3749: 0x0003, 0x374a: 0x0003, 0x374b: 0x0003, 0x374c: 0x0003, 0x374d: 0x0003, 0x374e: 0x0002, 0x374f: 0x0003, 0x3750: 0x0003, 0x3751: 0x0002, 0x3752: 0x0002, 0x3753: 0x0002, 0x3754: 0x0002, 0x3755: 0x0002, 0x3756: 0x0002, 0x3757: 0x0002, 0x3758: 0x0002, 0x3759: 0x0002, 0x375a: 0x0002, 0x375b: 0x0003, 0x375c: 0x0003, 0x375d: 0x0003, 0x375e: 0x0003, 0x375f: 0x0003, 0x3760: 0x0003, 0x3761: 0x0003, 0x3762: 0x0003, 0x3763: 0x0003, 0x3764: 0x0003, 0x3765: 0x0003, 0x3766: 0x0003, 0x3767: 0x0003, 0x3768: 0x0003, 0x3769: 0x0003, 0x376a: 0x0003, 0x376b: 0x0003, 0x376c: 0x0003, // Block 0xde, offset 0x3780 0x37a6: 0x0002, 0x37a7: 0x0002, 0x37a8: 0x0002, 0x37a9: 0x0002, 0x37aa: 0x0002, 0x37ab: 0x0002, 0x37ac: 0x0002, 0x37ad: 0x0002, 0x37ae: 0x0002, 0x37af: 0x0002, 0x37b0: 0x0002, 0x37b1: 0x0002, 0x37b2: 0x0002, 0x37b3: 0x0002, 0x37b4: 0x0002, 0x37b5: 0x0002, 0x37b6: 0x0002, 0x37b7: 0x0002, 0x37b8: 0x0002, 0x37b9: 0x0002, 0x37ba: 0x0002, 0x37bb: 0x0002, 0x37bc: 0x0002, 0x37bd: 0x0002, 0x37be: 0x0002, 0x37bf: 0x0002, // Block 0xdf, offset 0x37c0 0x37c0: 0x0002, 0x37c1: 0x0002, 0x37c2: 0x0002, 0x37d0: 0x0002, 0x37d1: 0x0002, 0x37d2: 0x0002, 0x37d3: 0x0002, 0x37d4: 0x0002, 0x37d5: 0x0002, 0x37d6: 0x0002, 0x37d7: 0x0002, 0x37d8: 0x0002, 0x37d9: 0x0002, 0x37da: 0x0002, 0x37db: 0x0002, 0x37dc: 0x0002, 0x37dd: 0x0002, 0x37de: 0x0002, 0x37df: 0x0002, 0x37e0: 0x0002, 0x37e1: 0x0002, 0x37e2: 0x0002, 0x37e3: 0x0002, 0x37e4: 0x0002, 0x37e5: 0x0002, 0x37e6: 0x0002, 0x37e7: 0x0002, 0x37e8: 0x0002, 0x37e9: 0x0002, 0x37ea: 0x0002, 0x37eb: 0x0002, 0x37ec: 0x0002, 0x37ed: 0x0002, 0x37ee: 0x0002, 0x37ef: 0x0002, 0x37f0: 0x0002, 0x37f1: 0x0002, 0x37f2: 0x0002, 0x37f3: 0x0002, 0x37f4: 0x0002, 0x37f5: 0x0002, 0x37f6: 0x0002, 0x37f7: 0x0002, 0x37f8: 0x0002, 0x37f9: 0x0002, 0x37fa: 0x0002, 0x37fb: 0x0002, // Block 0xe0, offset 0x3800 0x3800: 0x0002, 0x3801: 0x0002, 0x3802: 0x0002, 0x3803: 0x0002, 0x3804: 0x0002, 0x3805: 0x0002, 0x3806: 0x0002, 0x3807: 0x0002, 0x3808: 0x0002, 0x3810: 0x0002, 0x3811: 0x0002, 0x3820: 0x0002, 0x3821: 0x0002, 0x3822: 0x0002, 0x3823: 0x0002, 0x3824: 0x0002, 0x3825: 0x0002, // Block 0xe1, offset 0x3840 0x3840: 0x0002, 0x3841: 0x0002, 0x3842: 0x0002, 0x3843: 0x0002, 0x3844: 0x0002, 0x3845: 0x0002, 0x3846: 0x0002, 0x3847: 0x0002, 0x3848: 0x0002, 0x3849: 0x0002, 0x384a: 0x0002, 0x384b: 0x0002, 0x384c: 0x0002, 0x384d: 0x0002, 0x384e: 0x0002, 0x384f: 0x0002, 0x3850: 0x0002, 0x3851: 0x0002, 0x3852: 0x0002, 0x3853: 0x0002, 0x3854: 0x0002, 0x3855: 0x0002, 0x3856: 0x0002, 0x3857: 0x0002, 0x3858: 0x0002, 0x3859: 0x0002, 0x385a: 0x0002, 0x385b: 0x0002, 0x385c: 0x0002, 0x385d: 0x0002, 0x385e: 0x0002, 0x385f: 0x0002, 0x3860: 0x0002, 0x386d: 0x0002, 0x386e: 0x0002, 0x386f: 0x0002, 0x3870: 0x0002, 0x3871: 0x0002, 0x3872: 0x0002, 0x3873: 0x0002, 0x3874: 0x0002, 0x3875: 0x0002, 0x3877: 0x0002, 0x3878: 0x0002, 0x3879: 0x0002, 0x387a: 0x0002, 0x387b: 0x0002, 0x387c: 0x0002, 0x387d: 0x0002, 0x387e: 0x0002, 0x387f: 0x0002, // Block 0xe2, offset 0x3880 0x3880: 0x0002, 0x3881: 0x0002, 0x3882: 0x0002, 0x3883: 0x0002, 0x3884: 0x0002, 0x3885: 0x0002, 0x3886: 0x0002, 0x3887: 0x0002, 0x3888: 0x0002, 0x3889: 0x0002, 0x388a: 0x0002, 0x388b: 0x0002, 0x388c: 0x0002, 0x388d: 0x0002, 0x388e: 0x0002, 0x388f: 0x0002, 0x3890: 0x0002, 0x3891: 0x0002, 0x3892: 0x0002, 0x3893: 0x0002, 0x3894: 0x0002, 0x3895: 0x0002, 0x3896: 0x0002, 0x3897: 0x0002, 0x3898: 0x0002, 0x3899: 0x0002, 0x389a: 0x0002, 0x389b: 0x0002, 0x389c: 0x0002, 0x389d: 0x0002, 0x389e: 0x0002, 0x389f: 0x0002, 0x38a0: 0x0002, 0x38a1: 0x0002, 0x38a2: 0x0002, 0x38a3: 0x0002, 0x38a4: 0x0002, 0x38a5: 0x0002, 0x38a6: 0x0002, 0x38a7: 0x0002, 0x38a8: 0x0002, 0x38a9: 0x0002, 0x38aa: 0x0002, 0x38ab: 0x0002, 0x38ac: 0x0002, 0x38ad: 0x0002, 0x38ae: 0x0002, 0x38af: 0x0002, 0x38b0: 0x0002, 0x38b1: 0x0002, 0x38b2: 0x0002, 0x38b3: 0x0002, 0x38b4: 0x0002, 0x38b5: 0x0002, 0x38b6: 0x0002, 0x38b7: 0x0002, 0x38b8: 0x0002, 0x38b9: 0x0002, 0x38ba: 0x0002, 0x38bb: 0x0002, 0x38bc: 0x0002, 0x38be: 0x0002, 0x38bf: 0x0002, // Block 0xe3, offset 0x38c0 0x38c0: 0x0002, 0x38c1: 0x0002, 0x38c2: 0x0002, 0x38c3: 0x0002, 0x38c4: 0x0002, 0x38c5: 0x0002, 0x38c6: 0x0002, 0x38c7: 0x0002, 0x38c8: 0x0002, 0x38c9: 0x0002, 0x38ca: 0x0002, 0x38cb: 0x0002, 0x38cc: 0x0002, 0x38cd: 0x0002, 0x38ce: 0x0002, 0x38cf: 0x0002, 0x38d0: 0x0002, 0x38d1: 0x0002, 0x38d2: 0x0002, 0x38d3: 0x0002, 0x38e0: 0x0002, 0x38e1: 0x0002, 0x38e2: 0x0002, 0x38e3: 0x0002, 0x38e4: 0x0002, 0x38e5: 0x0002, 0x38e6: 0x0002, 0x38e7: 0x0002, 0x38e8: 0x0002, 0x38e9: 0x0002, 0x38ea: 0x0002, 0x38eb: 0x0002, 0x38ec: 0x0002, 0x38ed: 0x0002, 0x38ee: 0x0002, 0x38ef: 0x0002, 0x38f0: 0x0002, 0x38f1: 0x0002, 0x38f2: 0x0002, 0x38f3: 0x0002, 0x38f4: 0x0002, 0x38f5: 0x0002, 0x38f6: 0x0002, 0x38f7: 0x0002, 0x38f8: 0x0002, 0x38f9: 0x0002, 0x38fa: 0x0002, 0x38fb: 0x0002, 0x38fc: 0x0002, 0x38fd: 0x0002, 0x38fe: 0x0002, 0x38ff: 0x0002, // Block 0xe4, offset 0x3900 0x3900: 0x0002, 0x3901: 0x0002, 0x3902: 0x0002, 0x3903: 0x0002, 0x3904: 0x0002, 0x3905: 0x0002, 0x3906: 0x0002, 0x3907: 0x0002, 0x3908: 0x0002, 0x3909: 0x0002, 0x390a: 0x0002, 0x390f: 0x0002, 0x3910: 0x0002, 0x3911: 0x0002, 0x3912: 0x0002, 0x3913: 0x0002, 0x3920: 0x0002, 0x3921: 0x0002, 0x3922: 0x0002, 0x3923: 0x0002, 0x3924: 0x0002, 0x3925: 0x0002, 0x3926: 0x0002, 0x3927: 0x0002, 0x3928: 0x0002, 0x3929: 0x0002, 0x392a: 0x0002, 0x392b: 0x0002, 0x392c: 0x0002, 0x392d: 0x0002, 0x392e: 0x0002, 0x392f: 0x0002, 0x3930: 0x0002, 0x3934: 0x0002, 0x3938: 0x0002, 0x3939: 0x0002, 0x393a: 0x0002, 0x393b: 0x0002, 0x393c: 0x0002, 0x393d: 0x0002, 0x393e: 0x0002, 0x393f: 0x0002, // Block 0xe5, offset 0x3940 0x3940: 0x0002, 0x3941: 0x0002, 0x3942: 0x0002, 0x3943: 0x0002, 0x3944: 0x0002, 0x3945: 0x0002, 0x3946: 0x0002, 0x3947: 0x0002, 0x3948: 0x0002, 0x3949: 0x0002, 0x394a: 0x0002, 0x394b: 0x0002, 0x394c: 0x0002, 0x394d: 0x0002, 0x394e: 0x0002, 0x394f: 0x0002, 0x3950: 0x0002, 0x3951: 0x0002, 0x3952: 0x0002, 0x3953: 0x0002, 0x3954: 0x0002, 0x3955: 0x0002, 0x3956: 0x0002, 0x3957: 0x0002, 0x3958: 0x0002, 0x3959: 0x0002, 0x395a: 0x0002, 0x395b: 0x0002, 0x395c: 0x0002, 0x395d: 0x0002, 0x395e: 0x0002, 0x395f: 0x0002, 0x3960: 0x0002, 0x3961: 0x0002, 0x3962: 0x0002, 0x3963: 0x0002, 0x3964: 0x0002, 0x3965: 0x0002, 0x3966: 0x0002, 0x3967: 0x0002, 0x3968: 0x0002, 0x3969: 0x0002, 0x396a: 0x0002, 0x396b: 0x0002, 0x396c: 0x0002, 0x396d: 0x0002, 0x396e: 0x0002, 0x396f: 0x0002, 0x3970: 0x0002, 0x3971: 0x0002, 0x3972: 0x0002, 0x3973: 0x0002, 0x3974: 0x0002, 0x3975: 0x0002, 0x3976: 0x0002, 0x3977: 0x0002, 0x3978: 0x0002, 0x3979: 0x0002, 0x397a: 0x0002, 0x397b: 0x0002, 0x397c: 0x0002, 0x397d: 0x0002, 0x397e: 0x0002, // Block 0xe6, offset 0x3980 0x3980: 0x0002, 0x3982: 0x0002, 0x3983: 0x0002, 0x3984: 0x0002, 0x3985: 0x0002, 0x3986: 0x0002, 0x3987: 0x0002, 0x3988: 0x0002, 0x3989: 0x0002, 0x398a: 0x0002, 0x398b: 0x0002, 0x398c: 0x0002, 0x398d: 0x0002, 0x398e: 0x0002, 0x398f: 0x0002, 0x3990: 0x0002, 0x3991: 0x0002, 0x3992: 0x0002, 0x3993: 0x0002, 0x3994: 0x0002, 0x3995: 0x0002, 0x3996: 0x0002, 0x3997: 0x0002, 0x3998: 0x0002, 0x3999: 0x0002, 0x399a: 0x0002, 0x399b: 0x0002, 0x399c: 0x0002, 0x399d: 0x0002, 0x399e: 0x0002, 0x399f: 0x0002, 0x39a0: 0x0002, 0x39a1: 0x0002, 0x39a2: 0x0002, 0x39a3: 0x0002, 0x39a4: 0x0002, 0x39a5: 0x0002, 0x39a6: 0x0002, 0x39a7: 0x0002, 0x39a8: 0x0002, 0x39a9: 0x0002, 0x39aa: 0x0002, 0x39ab: 0x0002, 0x39ac: 0x0002, 0x39ad: 0x0002, 0x39ae: 0x0002, 0x39af: 0x0002, 0x39b0: 0x0002, 0x39b1: 0x0002, 0x39b2: 0x0002, 0x39b3: 0x0002, 0x39b4: 0x0002, 0x39b5: 0x0002, 0x39b6: 0x0002, 0x39b7: 0x0002, 0x39b8: 0x0002, 0x39b9: 0x0002, 0x39ba: 0x0002, 0x39bb: 0x0002, 0x39bc: 0x0002, 0x39bd: 0x0002, 0x39be: 0x0002, 0x39bf: 0x0002, // Block 0xe7, offset 0x39c0 0x39c0: 0x0002, 0x39c1: 0x0002, 0x39c2: 0x0002, 0x39c3: 0x0002, 0x39c4: 0x0002, 0x39c5: 0x0002, 0x39c6: 0x0002, 0x39c7: 0x0002, 0x39c8: 0x0002, 0x39c9: 0x0002, 0x39ca: 0x0002, 0x39cb: 0x0002, 0x39cc: 0x0002, 0x39cd: 0x0002, 0x39ce: 0x0002, 0x39cf: 0x0002, 0x39d0: 0x0002, 0x39d1: 0x0002, 0x39d2: 0x0002, 0x39d3: 0x0002, 0x39d4: 0x0002, 0x39d5: 0x0002, 0x39d6: 0x0002, 0x39d7: 0x0002, 0x39d8: 0x0002, 0x39d9: 0x0002, 0x39da: 0x0002, 0x39db: 0x0002, 0x39dc: 0x0002, 0x39dd: 0x0002, 0x39de: 0x0002, 0x39df: 0x0002, 0x39e0: 0x0002, 0x39e1: 0x0002, 0x39e2: 0x0002, 0x39e3: 0x0002, 0x39e4: 0x0002, 0x39e5: 0x0002, 0x39e6: 0x0002, 0x39e7: 0x0002, 0x39e8: 0x0002, 0x39e9: 0x0002, 0x39ea: 0x0002, 0x39eb: 0x0002, 0x39ec: 0x0002, 0x39ed: 0x0002, 0x39ee: 0x0002, 0x39ef: 0x0002, 0x39f0: 0x0002, 0x39f1: 0x0002, 0x39f2: 0x0002, 0x39f3: 0x0002, 0x39f4: 0x0002, 0x39f5: 0x0002, 0x39f6: 0x0002, 0x39f7: 0x0002, 0x39f8: 0x0002, 0x39f9: 0x0002, 0x39fa: 0x0002, 0x39fb: 0x0002, 0x39fc: 0x0002, 0x39ff: 0x0002, // Block 0xe8, offset 0x3a00 0x3a00: 0x0002, 0x3a01: 0x0002, 0x3a02: 0x0002, 0x3a03: 0x0002, 0x3a04: 0x0002, 0x3a05: 0x0002, 0x3a06: 0x0002, 0x3a07: 0x0002, 0x3a08: 0x0002, 0x3a09: 0x0002, 0x3a0a: 0x0002, 0x3a0b: 0x0002, 0x3a0c: 0x0002, 0x3a0d: 0x0002, 0x3a0e: 0x0002, 0x3a0f: 0x0002, 0x3a10: 0x0002, 0x3a11: 0x0002, 0x3a12: 0x0002, 0x3a13: 0x0002, 0x3a14: 0x0002, 0x3a15: 0x0002, 0x3a16: 0x0002, 0x3a17: 0x0002, 0x3a18: 0x0002, 0x3a19: 0x0002, 0x3a1a: 0x0002, 0x3a1b: 0x0002, 0x3a1c: 0x0002, 0x3a1d: 0x0002, 0x3a1e: 0x0002, 0x3a1f: 0x0002, 0x3a20: 0x0002, 0x3a21: 0x0002, 0x3a22: 0x0002, 0x3a23: 0x0002, 0x3a24: 0x0002, 0x3a25: 0x0002, 0x3a26: 0x0002, 0x3a27: 0x0002, 0x3a28: 0x0002, 0x3a29: 0x0002, 0x3a2a: 0x0002, 0x3a2b: 0x0002, 0x3a2c: 0x0002, 0x3a2d: 0x0002, 0x3a2e: 0x0002, 0x3a2f: 0x0002, 0x3a30: 0x0002, 0x3a31: 0x0002, 0x3a32: 0x0002, 0x3a33: 0x0002, 0x3a34: 0x0002, 0x3a35: 0x0002, 0x3a36: 0x0002, 0x3a37: 0x0002, 0x3a38: 0x0002, 0x3a39: 0x0002, 0x3a3a: 0x0002, 0x3a3b: 0x0002, 0x3a3c: 0x0002, 0x3a3d: 0x0002, // Block 0xe9, offset 0x3a40 0x3a4b: 0x0002, 0x3a4c: 0x0002, 0x3a4d: 0x0002, 0x3a4e: 0x0002, 0x3a50: 0x0002, 0x3a51: 0x0002, 0x3a52: 0x0002, 0x3a53: 0x0002, 0x3a54: 0x0002, 0x3a55: 0x0002, 0x3a56: 0x0002, 0x3a57: 0x0002, 0x3a58: 0x0002, 0x3a59: 0x0002, 0x3a5a: 0x0002, 0x3a5b: 0x0002, 0x3a5c: 0x0002, 0x3a5d: 0x0002, 0x3a5e: 0x0002, 0x3a5f: 0x0002, 0x3a60: 0x0002, 0x3a61: 0x0002, 0x3a62: 0x0002, 0x3a63: 0x0002, 0x3a64: 0x0002, 0x3a65: 0x0002, 0x3a66: 0x0002, 0x3a67: 0x0002, 0x3a7a: 0x0002, // Block 0xea, offset 0x3a80 0x3a95: 0x0002, 0x3a96: 0x0002, 0x3aa4: 0x0002, // Block 0xeb, offset 0x3ac0 0x3afb: 0x0002, 0x3afc: 0x0002, 0x3afd: 0x0002, 0x3afe: 0x0002, 0x3aff: 0x0002, // Block 0xec, offset 0x3b00 0x3b00: 0x0002, 0x3b01: 0x0002, 0x3b02: 0x0002, 0x3b03: 0x0002, 0x3b04: 0x0002, 0x3b05: 0x0002, 0x3b06: 0x0002, 0x3b07: 0x0002, 0x3b08: 0x0002, 0x3b09: 0x0002, 0x3b0a: 0x0002, 0x3b0b: 0x0002, 0x3b0c: 0x0002, 0x3b0d: 0x0002, 0x3b0e: 0x0002, 0x3b0f: 0x0002, // Block 0xed, offset 0x3b40 0x3b40: 0x0002, 0x3b41: 0x0002, 0x3b42: 0x0002, 0x3b43: 0x0002, 0x3b44: 0x0002, 0x3b45: 0x0002, 0x3b4c: 0x0002, 0x3b50: 0x0002, 0x3b51: 0x0002, 0x3b52: 0x0002, 0x3b55: 0x0002, 0x3b56: 0x0002, 0x3b57: 0x0002, 0x3b58: 0x0002, 0x3b5c: 0x0002, 0x3b5d: 0x0002, 0x3b5e: 0x0002, 0x3b5f: 0x0002, 0x3b6b: 0x0002, 0x3b6c: 0x0002, 0x3b74: 0x0002, 0x3b75: 0x0002, 0x3b76: 0x0002, 0x3b77: 0x0002, 0x3b78: 0x0002, 0x3b79: 0x0002, 0x3b7a: 0x0002, 0x3b7b: 0x0002, 0x3b7c: 0x0002, // Block 0xee, offset 0x3b80 0x3ba0: 0x0002, 0x3ba1: 0x0002, 0x3ba2: 0x0002, 0x3ba3: 0x0002, 0x3ba4: 0x0002, 0x3ba5: 0x0002, 0x3ba6: 0x0002, 0x3ba7: 0x0002, 0x3ba8: 0x0002, 0x3ba9: 0x0002, 0x3baa: 0x0002, 0x3bab: 0x0002, 0x3bb0: 0x0002, // Block 0xef, offset 0x3bc0 0x3bcc: 0x0002, 0x3bcd: 0x0002, 0x3bce: 0x0002, 0x3bcf: 0x0002, 0x3bd0: 0x0002, 0x3bd1: 0x0002, 0x3bd2: 0x0002, 0x3bd3: 0x0002, 0x3bd4: 0x0002, 0x3bd5: 0x0002, 0x3bd6: 0x0002, 0x3bd7: 0x0002, 0x3bd8: 0x0002, 0x3bd9: 0x0002, 0x3bda: 0x0002, 0x3bdb: 0x0002, 0x3bdc: 0x0002, 0x3bdd: 0x0002, 0x3bde: 0x0002, 0x3bdf: 0x0002, 0x3be0: 0x0002, 0x3be1: 0x0002, 0x3be2: 0x0002, 0x3be3: 0x0002, 0x3be4: 0x0002, 0x3be5: 0x0002, 0x3be6: 0x0002, 0x3be7: 0x0002, 0x3be8: 0x0002, 0x3be9: 0x0002, 0x3bea: 0x0002, 0x3beb: 0x0002, 0x3bec: 0x0002, 0x3bed: 0x0002, 0x3bee: 0x0002, 0x3bef: 0x0002, 0x3bf0: 0x0002, 0x3bf1: 0x0002, 0x3bf2: 0x0002, 0x3bf3: 0x0002, 0x3bf4: 0x0002, 0x3bf5: 0x0002, 0x3bf6: 0x0002, 0x3bf7: 0x0002, 0x3bf8: 0x0002, 0x3bf9: 0x0002, 0x3bfa: 0x0002, 0x3bfc: 0x0002, 0x3bfd: 0x0002, 0x3bfe: 0x0002, 0x3bff: 0x0002, // Block 0xf0, offset 0x3c00 0x3c00: 0x0002, 0x3c01: 0x0002, 0x3c02: 0x0002, 0x3c03: 0x0002, 0x3c04: 0x0002, 0x3c05: 0x0002, 0x3c07: 0x0002, 0x3c08: 0x0002, 0x3c09: 0x0002, 0x3c0a: 0x0002, 0x3c0b: 0x0002, 0x3c0c: 0x0002, 0x3c0d: 0x0002, 0x3c0e: 0x0002, 0x3c0f: 0x0002, 0x3c10: 0x0002, 0x3c11: 0x0002, 0x3c12: 0x0002, 0x3c13: 0x0002, 0x3c14: 0x0002, 0x3c15: 0x0002, 0x3c16: 0x0002, 0x3c17: 0x0002, 0x3c18: 0x0002, 0x3c19: 0x0002, 0x3c1a: 0x0002, 0x3c1b: 0x0002, 0x3c1c: 0x0002, 0x3c1d: 0x0002, 0x3c1e: 0x0002, 0x3c1f: 0x0002, 0x3c20: 0x0002, 0x3c21: 0x0002, 0x3c22: 0x0002, 0x3c23: 0x0002, 0x3c24: 0x0002, 0x3c25: 0x0002, 0x3c26: 0x0002, 0x3c27: 0x0002, 0x3c28: 0x0002, 0x3c29: 0x0002, 0x3c2a: 0x0002, 0x3c2b: 0x0002, 0x3c2c: 0x0002, 0x3c2d: 0x0002, 0x3c2e: 0x0002, 0x3c2f: 0x0002, 0x3c30: 0x0002, 0x3c31: 0x0002, 0x3c32: 0x0002, 0x3c33: 0x0002, 0x3c34: 0x0002, 0x3c35: 0x0002, 0x3c36: 0x0002, 0x3c37: 0x0002, 0x3c38: 0x0002, 0x3c39: 0x0002, 0x3c3a: 0x0002, 0x3c3b: 0x0002, 0x3c3c: 0x0002, 0x3c3d: 0x0002, 0x3c3e: 0x0002, 0x3c3f: 0x0002, // Block 0xf1, offset 0x3c40 0x3c70: 0x0002, 0x3c71: 0x0002, 0x3c72: 0x0002, 0x3c73: 0x0002, 0x3c74: 0x0002, 0x3c75: 0x0002, 0x3c76: 0x0002, 0x3c77: 0x0002, 0x3c78: 0x0002, 0x3c79: 0x0002, 0x3c7a: 0x0002, 0x3c7b: 0x0002, 0x3c7c: 0x0002, // Block 0xf2, offset 0x3c80 0x3c80: 0x0002, 0x3c81: 0x0002, 0x3c82: 0x0002, 0x3c83: 0x0002, 0x3c84: 0x0002, 0x3c85: 0x0002, 0x3c86: 0x0002, 0x3c87: 0x0002, 0x3c88: 0x0002, 0x3c89: 0x0002, 0x3c8a: 0x0002, 0x3c8e: 0x0002, 0x3c8f: 0x0002, 0x3c90: 0x0002, 0x3c91: 0x0002, 0x3c92: 0x0002, 0x3c93: 0x0002, 0x3c94: 0x0002, 0x3c95: 0x0002, 0x3c96: 0x0002, 0x3c97: 0x0002, 0x3c98: 0x0002, 0x3c99: 0x0002, 0x3c9a: 0x0002, 0x3c9b: 0x0002, 0x3c9c: 0x0002, 0x3c9d: 0x0002, 0x3c9e: 0x0002, 0x3c9f: 0x0002, 0x3ca0: 0x0002, 0x3ca1: 0x0002, 0x3ca2: 0x0002, 0x3ca3: 0x0002, 0x3ca4: 0x0002, 0x3ca5: 0x0002, 0x3ca6: 0x0002, 0x3ca7: 0x0002, 0x3ca8: 0x0002, 0x3ca9: 0x0002, 0x3caa: 0x0002, 0x3cab: 0x0002, 0x3cac: 0x0002, 0x3cad: 0x0002, 0x3cae: 0x0002, 0x3caf: 0x0002, 0x3cb0: 0x0002, 0x3cb1: 0x0002, 0x3cb2: 0x0002, 0x3cb3: 0x0002, 0x3cb4: 0x0002, 0x3cb5: 0x0002, 0x3cb6: 0x0002, 0x3cb7: 0x0002, 0x3cb8: 0x0002, 0x3cb9: 0x0002, 0x3cba: 0x0002, 0x3cbb: 0x0002, 0x3cbc: 0x0002, 0x3cbd: 0x0002, 0x3cbe: 0x0002, 0x3cbf: 0x0002, // Block 0xf3, offset 0x3cc0 0x3cc0: 0x0002, 0x3cc1: 0x0002, 0x3cc2: 0x0002, 0x3cc3: 0x0002, 0x3cc4: 0x0002, 0x3cc5: 0x0002, 0x3cc6: 0x0002, 0x3cc8: 0x0002, 0x3ccd: 0x0002, 0x3cce: 0x0002, 0x3ccf: 0x0002, 0x3cd0: 0x0002, 0x3cd1: 0x0002, 0x3cd2: 0x0002, 0x3cd3: 0x0002, 0x3cd4: 0x0002, 0x3cd5: 0x0002, 0x3cd6: 0x0002, 0x3cd7: 0x0002, 0x3cd8: 0x0002, 0x3cd9: 0x0002, 0x3cda: 0x0002, 0x3cdb: 0x0002, 0x3cdc: 0x0002, 0x3cdf: 0x0002, 0x3ce0: 0x0002, 0x3ce1: 0x0002, 0x3ce2: 0x0002, 0x3ce3: 0x0002, 0x3ce4: 0x0002, 0x3ce5: 0x0002, 0x3ce6: 0x0002, 0x3ce7: 0x0002, 0x3ce8: 0x0002, 0x3ce9: 0x0002, 0x3cea: 0x0002, 0x3cef: 0x0002, 0x3cf0: 0x0002, 0x3cf1: 0x0002, 0x3cf2: 0x0002, 0x3cf3: 0x0002, 0x3cf4: 0x0002, 0x3cf5: 0x0002, 0x3cf6: 0x0002, 0x3cf7: 0x0002, 0x3cf8: 0x0002, // Block 0xf4, offset 0x3d00 0x3d01: 0x0001, 0x3d20: 0x0001, 0x3d21: 0x0001, 0x3d22: 0x0001, 0x3d23: 0x0001, 0x3d24: 0x0001, 0x3d25: 0x0001, 0x3d26: 0x0001, 0x3d27: 0x0001, 0x3d28: 0x0001, 0x3d29: 0x0001, 0x3d2a: 0x0001, 0x3d2b: 0x0001, 0x3d2c: 0x0001, 0x3d2d: 0x0001, 0x3d2e: 0x0001, 0x3d2f: 0x0001, 0x3d30: 0x0001, 0x3d31: 0x0001, 0x3d32: 0x0001, 0x3d33: 0x0001, 0x3d34: 0x0001, 0x3d35: 0x0001, 0x3d36: 0x0001, 0x3d37: 0x0001, 0x3d38: 0x0001, 0x3d39: 0x0001, 0x3d3a: 0x0001, 0x3d3b: 0x0001, 0x3d3c: 0x0001, 0x3d3d: 0x0001, 0x3d3e: 0x0001, 0x3d3f: 0x0001, // Block 0xf5, offset 0x3d40 0x3d40: 0x0003, 0x3d41: 0x0003, 0x3d42: 0x0003, 0x3d43: 0x0003, 0x3d44: 0x0003, 0x3d45: 0x0003, 0x3d46: 0x0003, 0x3d47: 0x0003, 0x3d48: 0x0003, 0x3d49: 0x0003, 0x3d4a: 0x0003, 0x3d4b: 0x0003, 0x3d4c: 0x0003, 0x3d4d: 0x0003, 0x3d4e: 0x0003, 0x3d4f: 0x0003, 0x3d50: 0x0003, 0x3d51: 0x0003, 0x3d52: 0x0003, 0x3d53: 0x0003, 0x3d54: 0x0003, 0x3d55: 0x0003, 0x3d56: 0x0003, 0x3d57: 0x0003, 0x3d58: 0x0003, 0x3d59: 0x0003, 0x3d5a: 0x0003, 0x3d5b: 0x0003, 0x3d5c: 0x0003, 0x3d5d: 0x0003, 0x3d5e: 0x0003, 0x3d5f: 0x0003, 0x3d60: 0x0003, 0x3d61: 0x0003, 0x3d62: 0x0003, 0x3d63: 0x0003, 0x3d64: 0x0003, 0x3d65: 0x0003, 0x3d66: 0x0003, 0x3d67: 0x0003, 0x3d68: 0x0003, 0x3d69: 0x0003, 0x3d6a: 0x0003, 0x3d6b: 0x0003, 0x3d6c: 0x0003, 0x3d6d: 0x0003, 0x3d6e: 0x0003, 0x3d6f: 0x0003, 0x3d70: 0x0003, 0x3d71: 0x0003, 0x3d72: 0x0003, 0x3d73: 0x0003, 0x3d74: 0x0003, 0x3d75: 0x0003, 0x3d76: 0x0003, 0x3d77: 0x0003, 0x3d78: 0x0003, 0x3d79: 0x0003, 0x3d7a: 0x0003, 0x3d7b: 0x0003, 0x3d7c: 0x0003, 0x3d7d: 0x0003, } // stringWidthIndex: 30 blocks, 1920 entries, 1920 bytes // Block 0 is the zero block. var stringWidthIndex = [1920]uint8{ // Block 0x0, offset 0x0 // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc5: 0x04, 0xc7: 0x05, 0xc9: 0x06, 0xcb: 0x07, 0xcc: 0x08, 0xcd: 0x09, 0xce: 0x0a, 0xcf: 0x0b, 0xd0: 0x0c, 0xd1: 0x0d, 0xd2: 0x0e, 0xd6: 0x0f, 0xd7: 0x10, 0xd8: 0x11, 0xd9: 0x12, 0xdb: 0x13, 0xdc: 0x14, 0xdd: 0x15, 0xde: 0x16, 0xdf: 0x17, 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe5: 0x06, 0xe6: 0x06, 0xe7: 0x06, 0xe8: 0x06, 0xe9: 0x06, 0xea: 0x07, 0xeb: 0x06, 0xec: 0x06, 0xed: 0x08, 0xee: 0x09, 0xef: 0x0a, 0xf0: 0x17, 0xf3: 0x1a, 0xf4: 0x1b, // Block 0x4, offset 0x100 0x120: 0x18, 0x121: 0x19, 0x122: 0x1a, 0x123: 0x1b, 0x124: 0x1c, 0x125: 0x1d, 0x126: 0x1e, 0x127: 0x1f, 0x128: 0x20, 0x129: 0x21, 0x12a: 0x20, 0x12b: 0x22, 0x12c: 0x23, 0x12d: 0x24, 0x12e: 0x25, 0x12f: 0x26, 0x130: 0x27, 0x131: 0x28, 0x132: 0x23, 0x133: 0x29, 0x134: 0x2a, 0x135: 0x2b, 0x136: 0x2c, 0x137: 0x2d, 0x138: 0x2e, 0x139: 0x2f, 0x13a: 0x30, 0x13b: 0x31, 0x13c: 0x32, 0x13d: 0x33, 0x13e: 0x34, 0x13f: 0x35, // Block 0x5, offset 0x140 0x140: 0x36, 0x141: 0x37, 0x142: 0x38, 0x144: 0x39, 0x145: 0x3a, 0x14d: 0x3b, 0x15c: 0x3c, 0x15d: 0x3d, 0x15e: 0x3e, 0x15f: 0x3f, 0x160: 0x40, 0x162: 0x41, 0x164: 0x42, 0x168: 0x43, 0x169: 0x44, 0x16a: 0x45, 0x16b: 0x46, 0x16c: 0x47, 0x16d: 0x48, 0x16e: 0x49, 0x16f: 0x4a, 0x170: 0x4b, 0x173: 0x4c, 0x177: 0x08, // Block 0x6, offset 0x180 0x180: 0x4d, 0x181: 0x4e, 0x182: 0x4f, 0x183: 0x50, 0x184: 0x51, 0x185: 0x52, 0x186: 0x53, 0x187: 0x54, 0x188: 0x55, 0x189: 0x56, 0x18a: 0x57, 0x18c: 0x58, 0x18f: 0x59, 0x191: 0x5a, 0x192: 0x5b, 0x193: 0x5c, 0x194: 0x5b, 0x195: 0x5d, 0x196: 0x5e, 0x197: 0x5f, 0x198: 0x60, 0x199: 0x61, 0x19a: 0x62, 0x19b: 0x63, 0x19c: 0x64, 0x19d: 0x65, 0x19e: 0x66, 0x1ac: 0x67, 0x1ad: 0x68, 0x1b3: 0x69, 0x1b5: 0x6a, 0x1b7: 0x6b, 0x1ba: 0x6c, 0x1bb: 0x6d, 0x1bc: 0x39, 0x1bd: 0x39, 0x1be: 0x39, 0x1bf: 0x6e, // Block 0x7, offset 0x1c0 0x1c0: 0x6f, 0x1c1: 0x70, 0x1c2: 0x71, 0x1c3: 0x39, 0x1c4: 0x72, 0x1c5: 0x39, 0x1c6: 0x73, 0x1c7: 0x74, 0x1c8: 0x75, 0x1c9: 0x76, 0x1ca: 0x39, 0x1cb: 0x39, 0x1cc: 0x39, 0x1cd: 0x39, 0x1ce: 0x39, 0x1cf: 0x39, 0x1d0: 0x39, 0x1d1: 0x39, 0x1d2: 0x39, 0x1d3: 0x39, 0x1d4: 0x39, 0x1d5: 0x39, 0x1d6: 0x39, 0x1d7: 0x39, 0x1d8: 0x39, 0x1d9: 0x39, 0x1da: 0x39, 0x1db: 0x39, 0x1dc: 0x39, 0x1dd: 0x39, 0x1de: 0x39, 0x1df: 0x39, 0x1e0: 0x39, 0x1e1: 0x39, 0x1e2: 0x39, 0x1e3: 0x39, 0x1e4: 0x39, 0x1e5: 0x39, 0x1e6: 0x39, 0x1e7: 0x39, 0x1e8: 0x39, 0x1e9: 0x39, 0x1ea: 0x39, 0x1eb: 0x39, 0x1ec: 0x39, 0x1ed: 0x39, 0x1ee: 0x39, 0x1ef: 0x39, 0x1f0: 0x39, 0x1f1: 0x39, 0x1f2: 0x39, 0x1f3: 0x39, 0x1f4: 0x39, 0x1f5: 0x39, 0x1f6: 0x39, 0x1f7: 0x39, 0x1f8: 0x39, 0x1f9: 0x39, 0x1fa: 0x39, 0x1fb: 0x39, 0x1fc: 0x39, 0x1fd: 0x39, 0x1fe: 0x39, 0x1ff: 0x39, // Block 0x8, offset 0x200 0x200: 0x39, 0x201: 0x39, 0x202: 0x39, 0x203: 0x39, 0x204: 0x39, 0x205: 0x39, 0x206: 0x39, 0x207: 0x39, 0x208: 0x39, 0x209: 0x39, 0x20a: 0x39, 0x20b: 0x39, 0x20c: 0x39, 0x20d: 0x39, 0x20e: 0x39, 0x20f: 0x39, 0x210: 0x39, 0x211: 0x39, 0x212: 0x39, 0x213: 0x39, 0x214: 0x39, 0x215: 0x39, 0x216: 0x39, 0x217: 0x39, 0x218: 0x39, 0x219: 0x39, 0x21a: 0x39, 0x21b: 0x39, 0x21c: 0x39, 0x21d: 0x39, 0x21e: 0x39, 0x21f: 0x39, 0x220: 0x39, 0x221: 0x39, 0x222: 0x39, 0x223: 0x39, 0x224: 0x39, 0x225: 0x39, 0x226: 0x39, 0x227: 0x39, 0x228: 0x39, 0x229: 0x39, 0x22a: 0x39, 0x22b: 0x39, 0x22c: 0x39, 0x22d: 0x39, 0x22e: 0x39, 0x22f: 0x39, 0x230: 0x39, 0x231: 0x39, 0x232: 0x39, 0x233: 0x39, 0x234: 0x39, 0x235: 0x39, 0x236: 0x39, 0x237: 0x39, 0x238: 0x39, 0x239: 0x39, 0x23a: 0x39, 0x23b: 0x39, 0x23c: 0x39, 0x23d: 0x39, 0x23e: 0x39, 0x23f: 0x39, // Block 0x9, offset 0x240 0x240: 0x39, 0x241: 0x39, 0x242: 0x39, 0x243: 0x39, 0x244: 0x39, 0x245: 0x39, 0x246: 0x39, 0x247: 0x39, 0x248: 0x39, 0x249: 0x39, 0x24a: 0x39, 0x24b: 0x39, 0x24c: 0x39, 0x24d: 0x39, 0x24e: 0x39, 0x24f: 0x39, 0x250: 0x39, 0x251: 0x39, 0x252: 0x77, 0x253: 0x78, 0x259: 0x79, 0x25a: 0x7a, 0x25b: 0x7b, 0x260: 0x7c, 0x263: 0x7d, 0x264: 0x7e, 0x265: 0x7f, 0x266: 0x80, 0x267: 0x81, 0x268: 0x82, 0x269: 0x83, 0x26a: 0x84, 0x26b: 0x85, 0x26f: 0x86, 0x270: 0x39, 0x271: 0x39, 0x272: 0x39, 0x273: 0x39, 0x274: 0x39, 0x275: 0x39, 0x276: 0x39, 0x277: 0x39, 0x278: 0x39, 0x279: 0x39, 0x27a: 0x39, 0x27b: 0x39, 0x27c: 0x39, 0x27d: 0x39, 0x27e: 0x39, 0x27f: 0x39, // Block 0xa, offset 0x280 0x280: 0x39, 0x281: 0x39, 0x282: 0x39, 0x283: 0x39, 0x284: 0x39, 0x285: 0x39, 0x286: 0x39, 0x287: 0x39, 0x288: 0x39, 0x289: 0x39, 0x28a: 0x39, 0x28b: 0x39, 0x28c: 0x39, 0x28d: 0x39, 0x28e: 0x39, 0x28f: 0x39, 0x290: 0x39, 0x291: 0x39, 0x292: 0x39, 0x293: 0x39, 0x294: 0x39, 0x295: 0x39, 0x296: 0x39, 0x297: 0x39, 0x298: 0x39, 0x299: 0x39, 0x29a: 0x39, 0x29b: 0x39, 0x29c: 0x39, 0x29d: 0x39, 0x29e: 0x87, // Block 0xb, offset 0x2c0 0x2c0: 0x5b, 0x2c1: 0x5b, 0x2c2: 0x5b, 0x2c3: 0x5b, 0x2c4: 0x5b, 0x2c5: 0x5b, 0x2c6: 0x5b, 0x2c7: 0x5b, 0x2c8: 0x5b, 0x2c9: 0x5b, 0x2ca: 0x5b, 0x2cb: 0x5b, 0x2cc: 0x5b, 0x2cd: 0x5b, 0x2ce: 0x5b, 0x2cf: 0x5b, 0x2d0: 0x5b, 0x2d1: 0x5b, 0x2d2: 0x5b, 0x2d3: 0x5b, 0x2d4: 0x5b, 0x2d5: 0x5b, 0x2d6: 0x5b, 0x2d7: 0x5b, 0x2d8: 0x5b, 0x2d9: 0x5b, 0x2da: 0x5b, 0x2db: 0x5b, 0x2dc: 0x5b, 0x2dd: 0x5b, 0x2de: 0x5b, 0x2df: 0x5b, 0x2e0: 0x5b, 0x2e1: 0x5b, 0x2e2: 0x5b, 0x2e3: 0x5b, 0x2e4: 0x5b, 0x2e5: 0x5b, 0x2e6: 0x5b, 0x2e7: 0x5b, 0x2e8: 0x5b, 0x2e9: 0x5b, 0x2ea: 0x5b, 0x2eb: 0x5b, 0x2ec: 0x5b, 0x2ed: 0x5b, 0x2ee: 0x5b, 0x2ef: 0x5b, 0x2f0: 0x5b, 0x2f1: 0x5b, 0x2f2: 0x5b, 0x2f3: 0x5b, 0x2f4: 0x5b, 0x2f5: 0x5b, 0x2f6: 0x5b, 0x2f7: 0x5b, 0x2f8: 0x5b, 0x2f9: 0x5b, 0x2fa: 0x5b, 0x2fb: 0x5b, 0x2fc: 0x5b, 0x2fd: 0x5b, 0x2fe: 0x5b, 0x2ff: 0x5b, // Block 0xc, offset 0x300 0x300: 0x5b, 0x301: 0x5b, 0x302: 0x5b, 0x303: 0x5b, 0x304: 0x5b, 0x305: 0x5b, 0x306: 0x5b, 0x307: 0x5b, 0x308: 0x5b, 0x309: 0x5b, 0x30a: 0x5b, 0x30b: 0x5b, 0x30c: 0x5b, 0x30d: 0x5b, 0x30e: 0x5b, 0x30f: 0x5b, 0x310: 0x5b, 0x311: 0x5b, 0x312: 0x5b, 0x313: 0x5b, 0x314: 0x5b, 0x315: 0x5b, 0x316: 0x5b, 0x317: 0x5b, 0x318: 0x5b, 0x319: 0x5b, 0x31a: 0x5b, 0x31b: 0x5b, 0x31c: 0x5b, 0x31d: 0x5b, 0x31e: 0x5b, 0x31f: 0x5b, 0x320: 0x5b, 0x321: 0x5b, 0x322: 0x5b, 0x323: 0x5b, 0x324: 0x39, 0x325: 0x39, 0x326: 0x39, 0x327: 0x39, 0x328: 0x39, 0x329: 0x39, 0x32a: 0x39, 0x32b: 0x39, 0x32c: 0x88, 0x338: 0x89, 0x339: 0x8a, 0x33b: 0x6a, 0x33c: 0x70, 0x33d: 0x8b, 0x33f: 0x8c, // Block 0xd, offset 0x340 0x347: 0x8d, 0x34b: 0x8e, 0x34d: 0x8f, 0x368: 0x90, 0x36b: 0x91, 0x374: 0x92, 0x37a: 0x93, 0x37b: 0x94, 0x37d: 0x95, 0x37e: 0x96, // Block 0xe, offset 0x380 0x380: 0x97, 0x381: 0x98, 0x382: 0x99, 0x383: 0x9a, 0x384: 0x9b, 0x385: 0x9c, 0x386: 0x9d, 0x387: 0x9e, 0x388: 0x9f, 0x389: 0x2c, 0x38b: 0xa0, 0x38c: 0x2a, 0x38d: 0xa1, 0x390: 0xa2, 0x391: 0xa3, 0x392: 0xa4, 0x393: 0xa5, 0x396: 0xa6, 0x397: 0xa7, 0x398: 0xa8, 0x399: 0xa9, 0x39a: 0xaa, 0x39c: 0xab, 0x3a0: 0xac, 0x3a4: 0xad, 0x3a5: 0xae, 0x3a7: 0xaf, 0x3a8: 0xb0, 0x3a9: 0xb1, 0x3aa: 0xb2, 0x3b0: 0xb3, 0x3b2: 0xb4, 0x3b4: 0xb5, 0x3b5: 0xb6, 0x3b6: 0xb7, 0x3bb: 0xb8, 0x3bc: 0xb9, 0x3bd: 0xba, // Block 0xf, offset 0x3c0 0x3d0: 0x45, 0x3d1: 0xbb, // Block 0x10, offset 0x400 0x42b: 0xbc, 0x42c: 0xbd, 0x43d: 0xbe, 0x43e: 0xbf, 0x43f: 0xc0, // Block 0x11, offset 0x440 0x440: 0x39, 0x441: 0x39, 0x442: 0x39, 0x443: 0x39, 0x444: 0x39, 0x445: 0x39, 0x446: 0x39, 0x447: 0x39, 0x448: 0x39, 0x449: 0x39, 0x44a: 0x39, 0x44b: 0x39, 0x44c: 0x39, 0x44d: 0x39, 0x44e: 0x39, 0x44f: 0x39, 0x450: 0x39, 0x451: 0x39, 0x452: 0x39, 0x453: 0x39, 0x454: 0x39, 0x455: 0x39, 0x456: 0x39, 0x457: 0x39, 0x458: 0x39, 0x459: 0x39, 0x45a: 0x39, 0x45b: 0x39, 0x45c: 0x39, 0x45d: 0x39, 0x45e: 0x39, 0x45f: 0x39, 0x460: 0x39, 0x461: 0x39, 0x462: 0x39, 0x463: 0x39, 0x464: 0x39, 0x465: 0x39, 0x466: 0x39, 0x467: 0x39, 0x468: 0x39, 0x469: 0x39, 0x46a: 0x39, 0x46b: 0x39, 0x46c: 0x39, 0x46d: 0x39, 0x46e: 0x39, 0x46f: 0x39, 0x470: 0x39, 0x471: 0x39, 0x472: 0x39, 0x473: 0xc1, 0x474: 0xc2, 0x476: 0x39, 0x477: 0xc3, // Block 0x12, offset 0x480 0x4bf: 0xc4, // Block 0x13, offset 0x4c0 0x4c0: 0x39, 0x4c1: 0x39, 0x4c2: 0x39, 0x4c3: 0x39, 0x4c4: 0xc5, 0x4c5: 0xc6, 0x4c6: 0x39, 0x4c7: 0x39, 0x4c8: 0x39, 0x4c9: 0x39, 0x4ca: 0x39, 0x4cb: 0xc7, 0x4f2: 0xc8, // Block 0x14, offset 0x500 0x53c: 0xc9, 0x53d: 0xca, // Block 0x15, offset 0x540 0x545: 0xcb, 0x546: 0xcc, 0x549: 0xcd, 0x54c: 0x39, 0x54d: 0xce, 0x568: 0xcf, 0x569: 0xd0, 0x56a: 0xd1, // Block 0x16, offset 0x580 0x580: 0xd2, 0x582: 0xbe, 0x584: 0xbd, 0x58a: 0xd3, 0x58b: 0xd4, 0x593: 0xd4, 0x5a3: 0xd5, 0x5a5: 0xd6, // Block 0x17, offset 0x5c0 0x5c0: 0xd7, 0x5c3: 0xd8, 0x5c4: 0xd9, 0x5c5: 0xda, 0x5c6: 0xdb, 0x5c7: 0xdc, 0x5c8: 0xdd, 0x5c9: 0xde, 0x5cc: 0xdf, 0x5cd: 0xe0, 0x5ce: 0xe1, 0x5cf: 0xe2, 0x5d0: 0xe3, 0x5d1: 0xe4, 0x5d2: 0x39, 0x5d3: 0xe5, 0x5d4: 0xe6, 0x5d5: 0xe7, 0x5d6: 0xe8, 0x5d7: 0xe9, 0x5d8: 0x39, 0x5d9: 0xea, 0x5da: 0x39, 0x5db: 0xeb, 0x5df: 0xec, 0x5e4: 0xed, 0x5e5: 0xee, 0x5e6: 0x39, 0x5e7: 0x39, 0x5e9: 0xef, 0x5ea: 0xf0, 0x5eb: 0xf1, // Block 0x18, offset 0x600 0x600: 0x39, 0x601: 0x39, 0x602: 0x39, 0x603: 0x39, 0x604: 0x39, 0x605: 0x39, 0x606: 0x39, 0x607: 0x39, 0x608: 0x39, 0x609: 0x39, 0x60a: 0x39, 0x60b: 0x39, 0x60c: 0x39, 0x60d: 0x39, 0x60e: 0x39, 0x60f: 0x39, 0x610: 0x39, 0x611: 0x39, 0x612: 0x39, 0x613: 0x39, 0x614: 0x39, 0x615: 0x39, 0x616: 0x39, 0x617: 0x39, 0x618: 0x39, 0x619: 0x39, 0x61a: 0x39, 0x61b: 0x39, 0x61c: 0x39, 0x61d: 0x39, 0x61e: 0x39, 0x61f: 0x39, 0x620: 0x39, 0x621: 0x39, 0x622: 0x39, 0x623: 0x39, 0x624: 0x39, 0x625: 0x39, 0x626: 0x39, 0x627: 0x39, 0x628: 0x39, 0x629: 0x39, 0x62a: 0x39, 0x62b: 0x39, 0x62c: 0x39, 0x62d: 0x39, 0x62e: 0x39, 0x62f: 0x39, 0x630: 0x39, 0x631: 0x39, 0x632: 0x39, 0x633: 0x39, 0x634: 0x39, 0x635: 0x39, 0x636: 0x39, 0x637: 0x39, 0x638: 0x39, 0x639: 0x39, 0x63a: 0x39, 0x63b: 0x39, 0x63c: 0x39, 0x63d: 0x39, 0x63e: 0x39, 0x63f: 0xe6, // Block 0x19, offset 0x640 0x650: 0x0b, 0x651: 0x0c, 0x653: 0x0d, 0x656: 0x0e, 0x657: 0x06, 0x658: 0x0f, 0x65a: 0x10, 0x65b: 0x11, 0x65c: 0x12, 0x65d: 0x13, 0x65e: 0x14, 0x65f: 0x15, 0x660: 0x06, 0x661: 0x06, 0x662: 0x06, 0x663: 0x06, 0x664: 0x06, 0x665: 0x06, 0x666: 0x06, 0x667: 0x06, 0x668: 0x06, 0x669: 0x06, 0x66a: 0x06, 0x66b: 0x06, 0x66c: 0x06, 0x66d: 0x06, 0x66e: 0x06, 0x66f: 0x16, 0x670: 0x06, 0x671: 0x06, 0x672: 0x06, 0x673: 0x06, 0x674: 0x06, 0x675: 0x06, 0x676: 0x06, 0x677: 0x06, 0x678: 0x06, 0x679: 0x06, 0x67a: 0x06, 0x67b: 0x06, 0x67c: 0x06, 0x67d: 0x06, 0x67e: 0x06, 0x67f: 0x16, // Block 0x1a, offset 0x680 0x680: 0xf2, 0x681: 0x08, 0x684: 0x08, 0x685: 0x08, 0x686: 0x08, 0x687: 0x09, // Block 0x1b, offset 0x6c0 0x6c0: 0x5b, 0x6c1: 0x5b, 0x6c2: 0x5b, 0x6c3: 0x5b, 0x6c4: 0x5b, 0x6c5: 0x5b, 0x6c6: 0x5b, 0x6c7: 0x5b, 0x6c8: 0x5b, 0x6c9: 0x5b, 0x6ca: 0x5b, 0x6cb: 0x5b, 0x6cc: 0x5b, 0x6cd: 0x5b, 0x6ce: 0x5b, 0x6cf: 0x5b, 0x6d0: 0x5b, 0x6d1: 0x5b, 0x6d2: 0x5b, 0x6d3: 0x5b, 0x6d4: 0x5b, 0x6d5: 0x5b, 0x6d6: 0x5b, 0x6d7: 0x5b, 0x6d8: 0x5b, 0x6d9: 0x5b, 0x6da: 0x5b, 0x6db: 0x5b, 0x6dc: 0x5b, 0x6dd: 0x5b, 0x6de: 0x5b, 0x6df: 0x5b, 0x6e0: 0x5b, 0x6e1: 0x5b, 0x6e2: 0x5b, 0x6e3: 0x5b, 0x6e4: 0x5b, 0x6e5: 0x5b, 0x6e6: 0x5b, 0x6e7: 0x5b, 0x6e8: 0x5b, 0x6e9: 0x5b, 0x6ea: 0x5b, 0x6eb: 0x5b, 0x6ec: 0x5b, 0x6ed: 0x5b, 0x6ee: 0x5b, 0x6ef: 0x5b, 0x6f0: 0x5b, 0x6f1: 0x5b, 0x6f2: 0x5b, 0x6f3: 0x5b, 0x6f4: 0x5b, 0x6f5: 0x5b, 0x6f6: 0x5b, 0x6f7: 0x5b, 0x6f8: 0x5b, 0x6f9: 0x5b, 0x6fa: 0x5b, 0x6fb: 0x5b, 0x6fc: 0x5b, 0x6fd: 0x5b, 0x6fe: 0x5b, 0x6ff: 0xf3, // Block 0x1c, offset 0x700 0x720: 0x18, 0x730: 0x09, 0x731: 0x09, 0x732: 0x09, 0x733: 0x09, 0x734: 0x09, 0x735: 0x09, 0x736: 0x09, 0x737: 0x09, 0x738: 0x09, 0x739: 0x09, 0x73a: 0x09, 0x73b: 0x09, 0x73c: 0x09, 0x73d: 0x09, 0x73e: 0x09, 0x73f: 0x19, // Block 0x1d, offset 0x740 0x740: 0x09, 0x741: 0x09, 0x742: 0x09, 0x743: 0x09, 0x744: 0x09, 0x745: 0x09, 0x746: 0x09, 0x747: 0x09, 0x748: 0x09, 0x749: 0x09, 0x74a: 0x09, 0x74b: 0x09, 0x74c: 0x09, 0x74d: 0x09, 0x74e: 0x09, 0x74f: 0x19, } golang-github-clipperhouse-displaywidth-0.11.0+ds/truncate.go000066400000000000000000000117051515060771000243070ustar00rootroot00000000000000package displaywidth import ( "strings" "github.com/clipperhouse/uax29/v2/graphemes" ) // TruncateString truncates a string to the given maxWidth, and appends the // given tail if the string is truncated. // // It ensures the visible width, including the width of the tail, is less than or // equal to maxWidth. // // When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that // appear after the truncation point are preserved in the output. This ensures // that escape sequences such as SGR resets are not lost, preventing color // bleed in terminal output. // // [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values // (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them // during truncation can shift byte boundaries and form unintended visible // characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width // measurement. func (options Options) TruncateString(s string, maxWidth int, tail string) string { // We deliberately ignore ControlSequences8Bit for truncation, see above. options.ControlSequences8Bit = false maxWidthWithoutTail := maxWidth - options.String(tail) var pos, total int g := graphemes.FromString(s) g.AnsiEscapeSequences = options.ControlSequences for g.Next() { gw := graphemeWidth(g.Value(), options) if total+gw <= maxWidthWithoutTail { pos = g.End() } total += gw if total > maxWidth { if options.ControlSequences { // Build result with trailing 7-bit ANSI escape sequences preserved var b strings.Builder b.Grow(len(s) + len(tail)) // at most original + tail b.WriteString(s[:pos]) b.WriteString(tail) rem := graphemes.FromString(s[pos:]) rem.AnsiEscapeSequences = options.ControlSequences for rem.Next() { v := rem.Value() // Only preserve 7-bit escapes (ESC = 0x1B) that measure // as zero-width on their own; some sequences (e.g. SOS) // are only valid in their original context. if len(v) > 0 && v[0] == 0x1B && options.String(v) == 0 { b.WriteString(v) } } return b.String() } return s[:pos] + tail } } // No truncation return s } // TruncateString truncates a string to the given maxWidth, and appends the // given tail if the string is truncated. // // It ensures the total width, including the width of the tail, is less than or // equal to maxWidth. func TruncateString(s string, maxWidth int, tail string) string { return DefaultOptions.TruncateString(s, maxWidth, tail) } // TruncateBytes truncates a []byte to the given maxWidth, and appends the // given tail if the []byte is truncated. // // It ensures the visible width, including the width of the tail, is less than or // equal to maxWidth. // // When [Options.ControlSequences] is true, 7-bit ANSI escape sequences that // appear after the truncation point are preserved in the output. This ensures // that escape sequences such as SGR resets are not lost, preventing color // bleed in terminal output. // // [Options.ControlSequences8Bit] is ignored by truncation. 8-bit C1 byte values // (0x80-0x9F) overlap with UTF-8 multi-byte encoding, so manipulating them // during truncation can shift byte boundaries and form unintended visible // characters. Use [Options.String] or [Options.Bytes] for 8-bit-aware width // measurement. func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { // We deliberately ignore ControlSequences8Bit for truncation, see above. options.ControlSequences8Bit = false maxWidthWithoutTail := maxWidth - options.Bytes(tail) var pos, total int g := graphemes.FromBytes(s) g.AnsiEscapeSequences = options.ControlSequences for g.Next() { gw := graphemeWidth(g.Value(), options) if total+gw <= maxWidthWithoutTail { pos = g.End() } total += gw if total > maxWidth { if options.ControlSequences { // Build result with trailing 7-bit ANSI escape sequences preserved result := make([]byte, 0, len(s)+len(tail)) // at most original + tail result = append(result, s[:pos]...) result = append(result, tail...) rem := graphemes.FromBytes(s[pos:]) rem.AnsiEscapeSequences = options.ControlSequences for rem.Next() { v := rem.Value() // Only preserve 7-bit escapes (ESC = 0x1B) that measure // as zero-width on their own; some sequences (e.g. SOS) // are only valid in their original context. if len(v) > 0 && v[0] == 0x1B && options.Bytes(v) == 0 { result = append(result, v...) } } return result } result := make([]byte, 0, pos+len(tail)) result = append(result, s[:pos]...) result = append(result, tail...) return result } } // No truncation return s } // TruncateBytes truncates a []byte to the given maxWidth, and appends the // given tail if the []byte is truncated. // // It ensures the total width, including the width of the tail, is less than or // equal to maxWidth. func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte { return DefaultOptions.TruncateBytes(s, maxWidth, tail) } golang-github-clipperhouse-displaywidth-0.11.0+ds/truncate_bench_test.go000066400000000000000000000037561515060771000265140ustar00rootroot00000000000000package displaywidth import "testing" var csOptions = Options{ControlSequences: true} // Inputs for benchmarking truncation with trailing escape sequence preservation var ( // Short colored text with reset shortANSI = "\x1b[31mhello world\x1b[0m" // Multiple stacked SGR sequences stackedANSI = "\x1b[1m\x1b[31m\x1b[42mhello world, this is some longer text\x1b[0m" // Many interleaved color changes interleavedANSI = "hello \x1b[31mworld \x1b[32mfoo \x1b[33mbar \x1b[34mbaz \x1b[35mqux \x1b[36mend\x1b[0m" // Plain text (no escape sequences) — baseline plainText = "hello world, this is some plain text without escapes" ) func BenchmarkTruncateString(b *testing.B) { benchmarks := []struct { name string input string options Options }{ {"plain/default", plainText, defaultOptions}, {"plain/ControlSequences", plainText, csOptions}, {"short_ANSI/default", shortANSI, defaultOptions}, {"short_ANSI/ControlSequences", shortANSI, csOptions}, {"stacked_ANSI/ControlSequences", stackedANSI, csOptions}, {"interleaved_ANSI/ControlSequences", interleavedANSI, csOptions}, } for _, bm := range benchmarks { b.Run(bm.name, func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = bm.options.TruncateString(bm.input, 5, "...") } }) } } var tail = []byte("...") func BenchmarkTruncateBytes(b *testing.B) { benchmarks := []struct { name string input []byte options Options }{ {"plain/default", []byte(plainText), defaultOptions}, {"plain/ControlSequences", []byte(plainText), csOptions}, {"short_ANSI/default", []byte(shortANSI), defaultOptions}, {"short_ANSI/ControlSequences", []byte(shortANSI), csOptions}, {"stacked_ANSI/ControlSequences", []byte(stackedANSI), csOptions}, {"interleaved_ANSI/ControlSequences", []byte(interleavedANSI), csOptions}, } for _, bm := range benchmarks { b.Run(bm.name, func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = bm.options.TruncateBytes(bm.input, 5, tail) } }) } } golang-github-clipperhouse-displaywidth-0.11.0+ds/width.go000066400000000000000000000134641515060771000236050ustar00rootroot00000000000000package displaywidth import ( "unicode/utf8" "github.com/clipperhouse/uax29/v2/graphemes" ) // String calculates the display width of a string, // by iterating over grapheme clusters in the string // and summing their widths. func String(s string) int { return DefaultOptions.String(s) } // String calculates the display width of a string, for the given options, by // iterating over grapheme clusters in the string and summing their widths. func (options Options) String(s string) int { width := 0 pos := 0 for pos < len(s) { // Try ASCII optimization asciiLen := printableASCIILength(s[pos:]) if asciiLen > 0 { width += asciiLen pos += asciiLen continue } // Not ASCII, use grapheme parsing g := graphemes.FromString(s[pos:]) g.AnsiEscapeSequences = options.ControlSequences g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit start := pos for g.Next() { v := g.Value() width += graphemeWidth(v, options) pos += len(v) // Quick check: if remaining might have printable ASCII, break to outer loop if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { break } } // Defensive, should not happen: if no progress was made, // skip a byte to prevent infinite loop. Only applies if // the grapheme parser misbehaves. if pos == start { pos++ } } return width } // Bytes calculates the display width of a []byte, // by iterating over grapheme clusters in the byte slice // and summing their widths. func Bytes(s []byte) int { return DefaultOptions.Bytes(s) } // Bytes calculates the display width of a []byte, for the given options, by // iterating over grapheme clusters in the slice and summing their widths. func (options Options) Bytes(s []byte) int { width := 0 pos := 0 for pos < len(s) { // Try ASCII optimization asciiLen := printableASCIILength(s[pos:]) if asciiLen > 0 { width += asciiLen pos += asciiLen continue } // Not ASCII, use grapheme parsing g := graphemes.FromBytes(s[pos:]) g.AnsiEscapeSequences = options.ControlSequences g.AnsiEscapeSequences8Bit = options.ControlSequences8Bit start := pos for g.Next() { v := g.Value() width += graphemeWidth(v, options) pos += len(v) // Quick check: if remaining might have printable ASCII, break to outer loop if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { break } } // Defensive, should not happen: if no progress was made, // skip a byte to prevent infinite loop. Only applies if // the grapheme parser misbehaves. if pos == start { pos++ } } return width } // Rune calculates the display width of a rune. You // should almost certainly use [String] or [Bytes] for // most purposes. // // The smallest unit of display width is a grapheme // cluster, not a rune. Iterating over runes to measure // width is incorrect in many cases. func Rune(r rune) int { return DefaultOptions.Rune(r) } // Rune calculates the display width of a rune, for the given options. // // You should almost certainly use [String] or [Bytes] for most purposes. // // The smallest unit of display width is a grapheme cluster, not a rune. // Iterating over runes to measure width is incorrect in many cases. func (options Options) Rune(r rune) int { if r < utf8.RuneSelf { return asciiWidth(byte(r)) } // Surrogates (U+D800-U+DFFF) are invalid UTF-8. if r >= 0xD800 && r <= 0xDFFF { return 0 } var buf [4]byte n := utf8.EncodeRune(buf[:], r) // Skip the grapheme iterator return graphemeWidth(buf[:n], options) } const _Default property = 0 // graphemeWidth returns the display width of a grapheme cluster. // The passed string must be a single grapheme cluster. func graphemeWidth[T ~string | []byte](s T, options Options) int { if len(s) == 0 { return 0 } // C1 controls (0x80-0x9F) are zero-width when 8-bit control sequences // are enabled. This must be checked before the single-byte optimization // below, which would otherwise return width 1 for these bytes. if options.ControlSequences8Bit && s[0] >= 0x80 && s[0] <= 0x9F { return 0 } // Optimization: single-byte graphemes need no property lookup if len(s) == 1 { return asciiWidth(s[0]) } // Multi-byte grapheme clusters led by a C0 control (0x00-0x1F) if s[0] <= 0x1F { return 0 } p, sz := lookup(s) prop := property(p) // Variation Selector 16 (VS16) requests emoji presentation if prop != _Wide && sz > 0 && len(s) >= sz+3 { vs := s[sz : sz+3] if isVS16(vs) { prop = _Wide } // VS15 (0x8E) requests text presentation but does not affect width, // in my reading of Unicode TR51. Falls through to return the base // character's property. } if options.EastAsianWidth && prop == _East_Asian_Ambiguous { prop = _Wide } if prop > upperBound { prop = _Default } return propertyWidths[prop] } func asciiWidth(b byte) int { if b <= 0x1F || b == 0x7F { return 0 } return 1 } // printableASCIILength returns the length of consecutive printable ASCII bytes // starting at the beginning of s. func printableASCIILength[T string | []byte](s T) int { i := 0 for ; i < len(s); i++ { b := s[i] // Printable ASCII is 0x20-0x7E (space through tilde) if b < 0x20 || b > 0x7E { break } } // If the next byte is non-ASCII (>= 0x80), back off by 1. The grapheme // parser may group the last ASCII byte with subsequent non-ASCII bytes, // such as combining marks. if i > 0 && i < len(s) && s[i] >= 0x80 { i-- } return i } // isVS16 checks if the slice matches VS16 (U+FE0F) UTF-8 encoding // (EF B8 8F). It assumes len(s) >= 3. func isVS16[T ~string | []byte](s T) bool { return s[0] == 0xEF && s[1] == 0xB8 && s[2] == 0x8F } // propertyWidths is a jump table of sorts, instead of a switch var propertyWidths = [4]int{ _Default: 1, _Zero_Width: 0, _Wide: 2, _East_Asian_Ambiguous: 1, } const upperBound = property(len(propertyWidths) - 1) golang-github-clipperhouse-displaywidth-0.11.0+ds/width_test.go000066400000000000000000002166361515060771000246520ustar00rootroot00000000000000package displaywidth import ( "bytes" "testing" ) var defaultOptions = Options{} var eawOptions = Options{EastAsianWidth: true} func TestStringWidth(t *testing.T) { tests := []struct { name string input string options Options expected int }{ // Basic ASCII characters {"empty string", "", defaultOptions, 0}, {"single ASCII", "a", defaultOptions, 1}, {"multiple ASCII", "hello", defaultOptions, 5}, {"ASCII with spaces", "hello world", defaultOptions, 11}, // Control characters (width 0) {"newline", "\n", defaultOptions, 0}, {"tab", "\t", defaultOptions, 0}, {"carriage return", "\r", defaultOptions, 0}, {"backspace", "\b", defaultOptions, 0}, // Mixed content {"ASCII with newline", "hello\nworld", defaultOptions, 10}, {"ASCII with tab", "hello\tworld", defaultOptions, 10}, // East Asian characters (should be in trie) {"CJK ideograph", "中", defaultOptions, 2}, {"CJK with ASCII", "hello中", defaultOptions, 7}, // Ambiguous characters {"ambiguous character", "★", defaultOptions, 1}, // Default narrow {"ambiguous character EAW", "★", eawOptions, 2}, // East Asian wide // Emoji {"emoji", "😀", defaultOptions, 2}, // Default emoji width {"checkered flag", "ðŸ", defaultOptions, 2}, // U+1F3C1 chequered flag is an emoji, width 2 // Invalid UTF-8 - the trie treats \xff as a valid character with default properties {"invalid UTF-8", "\xff", defaultOptions, 1}, {"partial UTF-8", "\xc2", defaultOptions, 1}, // Variation selectors - VS16 (U+FE0F) requests emoji, VS15 (U+FE0E) is a no-op per Unicode TR51 {"☺ text default", "☺", defaultOptions, 1}, // U+263A has text presentation by default {"â˜ºï¸ emoji with VS16", "☺ï¸", defaultOptions, 2}, // VS16 forces emoji presentation (width 2) {"⌛ emoji default", "⌛", defaultOptions, 2}, // U+231B has emoji presentation by default {"⌛︎ with VS15", "⌛︎", defaultOptions, 2}, // VS15 is a no-op, width remains 2 {"⤠text default", "â¤", defaultOptions, 1}, // U+2764 has text presentation by default {"â¤ï¸ emoji with VS16", "â¤ï¸", defaultOptions, 2}, // VS16 forces emoji presentation (width 2) {"✂ text default", "✂", defaultOptions, 1}, // U+2702 has text presentation by default {"âœ‚ï¸ emoji with VS16", "✂ï¸", defaultOptions, 2}, // VS16 forces emoji presentation (width 2) {"keycap 1ï¸âƒ£", "1ï¸âƒ£", defaultOptions, 2}, // Keycap sequence: 1 + VS16 + U+20E3 (always width 2) {"keycap #ï¸âƒ£", "#ï¸âƒ£", defaultOptions, 2}, // Keycap sequence: # + VS16 + U+20E3 (always width 2) // Flags (regional indicator pairs form a single grapheme, always width 2 per TR51) {"flag US", "🇺🇸", defaultOptions, 2}, {"flag JP", "🇯🇵", defaultOptions, 2}, {"text with flags", "Go 🇺🇸🚀", defaultOptions, 3 + 2 + 2}, // Partial ASCII optimization tests (8+ byte ASCII runs) {"ASCII 8 bytes then emoji", "12345678😀", defaultOptions, 8 + 2}, {"ASCII 16 bytes then CJK", "1234567890abcdef中", defaultOptions, 16 + 2}, {"emoji then ASCII 8 bytes", "😀12345678", defaultOptions, 2 + 8}, {"CJK then ASCII 16 bytes", "中1234567890abcdef", defaultOptions, 2 + 16}, {"ASCII-emoji-ASCII sandwich", "12345678😀abcdefgh", defaultOptions, 8 + 2 + 8}, {"short ASCII then emoji", "hello😀", defaultOptions, 5 + 2}, {"emoji-short ASCII-emoji", "😀abc😀", defaultOptions, 2 + 3 + 2}, {"long mixed", "Hello World! 你好世界 12345678 emoji: 🎉🎊", defaultOptions, 42}, // 13 + 9 + 9 + 7 + 4 // ASCII with embedded control characters {"ASCII with null in middle", "hello\x00world", defaultOptions, 10}, // 5 + 0 + 5 {"ASCII with DEL in middle", "hello\x7Fworld", defaultOptions, 10}, // 5 + 0 + 5 {"ASCII with multiple controls", "a\x00b\tc\nd", defaultOptions, 4}, // 1 + 0 + 1 + 0 + 1 + 0 + 1 // Alternating short ASCII/non-ASCII sequences {"alternating ASCII-CJK", "a中bæ–‡c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 {"alternating CJK-ASCII", "中aæ–‡bå­—c", defaultOptions, 9}, // 2 + 1 + 2 + 1 + 2 + 1 {"single char alternating", "a😀b🎉c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 {"rapid alternation", "aã‚bã„cã†d", defaultOptions, 10}, // 1 + 2 + 1 + 2 + 1 + 2 + 1 } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.options.String(tt.input) if result != tt.expected { t.Errorf("StringWidth(%q, %v) = %d, want %d", tt.input, tt.options, result, tt.expected) } b := []byte(tt.input) result = tt.options.Bytes(b) if result != tt.expected { t.Errorf("BytesWidth(%q, %v) = %d, want %d", b, tt.options, result, tt.expected) } }) } } var controlSequences = Options{ControlSequences: true} var controlSequences8Bit = Options{ControlSequences8Bit: true} var controlSequencesBoth = Options{ControlSequences: true, ControlSequences8Bit: true} func TestAnsiEscapeSequences(t *testing.T) { tests := []struct { name string input string options Options expected int }{ // ANSI escape sequences (ECMA-48) should be zero width when parsed as single graphemes {"SGR red", "\x1b[31m", controlSequences, 0}, {"SGR reset", "\x1b[0m", controlSequences, 0}, {"SGR bold", "\x1b[1m", controlSequences, 0}, {"SGR 256-color", "\x1b[38;5;196m", controlSequences, 0}, {"SGR truecolor", "\x1b[38;2;255;0;0m", controlSequences, 0}, {"cursor up", "\x1b[A", controlSequences, 0}, {"cursor position", "\x1b[10;20H", controlSequences, 0}, {"erase in display", "\x1b[2J", controlSequences, 0}, // ANSI escape sequences mixed with visible text {"red hello", "\x1b[31mhello\x1b[0m", controlSequences, 5}, {"bold world", "\x1b[1mworld\x1b[0m", controlSequences, 5}, {"colored CJK", "\x1b[31m中文\x1b[0m", controlSequences, 4}, {"colored emoji", "\x1b[31m😀\x1b[0m", controlSequences, 2}, {"nested SGR", "\x1b[1m\x1b[31mhi\x1b[0m", controlSequences, 2}, // CR+LF as a multi-byte C0-led grapheme (zero width) {"CRLF", "\r\n", controlSequences, 0}, {"text with CRLF", "hello\r\nworld", controlSequences, 10}, // Without ControlSequences, ESC is zero width but the rest of the sequence is visible {"bare ESC default options", "\x1b", defaultOptions, 0}, {"SGR red default options", "\x1b[31m", defaultOptions, 4}, {"red hello default options", "\x1b[31mhello\x1b[0m", defaultOptions, 12}, // ControlSequences should not regress width for strings with no escape sequences {"plain ASCII with option", "hello", controlSequences, 5}, {"plain ASCII spaces with option", "hello world", controlSequences, 11}, {"CJK with option", "中文", controlSequences, 4}, {"emoji with option", "😀", controlSequences, 2}, {"flag with option", "🇺🇸", controlSequences, 2}, {"mixed with option", "hello中文😀", controlSequences, 5 + 4 + 2}, {"ambiguous with option", "★", controlSequences, 1}, {"combining mark with option", "é", controlSequences, 1}, {"control chars with option", "\t\n", controlSequences, 0}, {"empty with option", "", controlSequences, 0}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.options.String(tt.input) if result != tt.expected { t.Errorf("String(%q) = %d, want %d", tt.input, result, tt.expected) } result = tt.options.Bytes([]byte(tt.input)) if result != tt.expected { t.Errorf("Bytes(%q) = %d, want %d", tt.input, result, tt.expected) } }) } } func TestAnsiEscapeSequences8Bit(t *testing.T) { tests := []struct { name string input string options Options expected int }{ // 8-bit C1 CSI sequences should be zero width {"C1 CSI red", "\x9B31m", controlSequences8Bit, 0}, {"C1 CSI reset", "\x9B0m", controlSequences8Bit, 0}, {"C1 CSI bold", "\x9B1m", controlSequences8Bit, 0}, {"C1 CSI multi-param", "\x9B1;2;3m", controlSequences8Bit, 0}, {"C1 CSI cursor up", "\x9BA", controlSequences8Bit, 0}, // 8-bit C1 OSC/DCS/SOS/APC with C1 ST terminator {"C1 OSC with ST", "\x9D0;Title\x9C", controlSequences8Bit, 0}, {"C1 OSC with BEL", "\x9D0;Title\x07", controlSequences8Bit, 0}, {"C1 DCS with ST", "\x90qpayload\x9C", controlSequences8Bit, 0}, {"C1 SOS with ST", "\x98hello\x9C", controlSequences8Bit, 0}, {"C1 APC with ST", "\x9Fdata\x9C", controlSequences8Bit, 0}, // Standalone C1 controls (single byte, no body) {"C1 IND", "\x84", controlSequences8Bit, 0}, {"C1 NEL", "\x85", controlSequences8Bit, 0}, // 8-bit sequences mixed with visible text {"C1 CSI red hello", "\x9B31mhello\x9B0m", controlSequences8Bit, 5}, {"C1 CSI colored CJK", "\x9B31m中文\x9B0m", controlSequences8Bit, 4}, {"C1 CSI colored emoji", "\x9B31m😀\x9B0m", controlSequences8Bit, 2}, {"C1 CSI nested", "\x9B1m\x9B31mhi\x9B0m", controlSequences8Bit, 2}, // Without ControlSequences8Bit, C1 bytes have width per asciiWidth (1 for >= 0x80) {"C1 CSI default options", "\x9B31m", defaultOptions, 4}, // 8-bit option should not regress plain text {"plain ASCII with 8-bit option", "hello", controlSequences8Bit, 5}, {"CJK with 8-bit option", "中文", controlSequences8Bit, 4}, {"emoji with 8-bit option", "😀", controlSequences8Bit, 2}, {"empty with 8-bit option", "", controlSequences8Bit, 0}, // Both options enabled {"both: 7-bit SGR", "\x1b[31mhello\x1b[0m", controlSequencesBoth, 5}, {"both: 8-bit CSI", "\x9B31mhello\x9B0m", controlSequencesBoth, 5}, {"both: mixed 7 and 8-bit", "\x1b[31mhello\x9B0m", controlSequencesBoth, 5}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.options.String(tt.input) if result != tt.expected { t.Errorf("String(%q) = %d, want %d", tt.input, result, tt.expected) } result = tt.options.Bytes([]byte(tt.input)) if result != tt.expected { t.Errorf("Bytes(%q) = %d, want %d", tt.input, result, tt.expected) } }) } } // TestAnsiEscapeSequencesIndependence verifies that the 7-bit and 8-bit options // are strictly independent: enabling one must NOT cause the other's sequences // to be treated as escape sequences. func TestAnsiEscapeSequencesIndependence(t *testing.T) { tests := []struct { name string input string options Options expected int desc string }{ // 7-bit only: C1 bytes must NOT be treated as escape sequences. // \x9B31m is 4 visible chars (0x9B has width 1, '3' '1' 'm' each width 1) { name: "7-bit on, 8-bit input C1 CSI", input: "\x9B31m", options: controlSequences, expected: 4, desc: "C1 CSI should not be recognized when only 7-bit is enabled", }, { name: "7-bit on, 8-bit input standalone C1", input: "\x84", options: controlSequences, expected: 1, desc: "Standalone C1 byte should have width 1 when only 7-bit is enabled", }, { name: "7-bit on, 8-bit input C1 with text", input: "\x9B31mhello\x9B0m", options: controlSequences, expected: 4 + 5 + 3, desc: "C1 CSI sequences should contribute visible width when only 7-bit is enabled", }, // 8-bit only: 7-bit ESC sequences must NOT be treated as escape sequences. // \x1b[31m is: ESC (width 0) + '[' (1) + '3' (1) + '1' (1) + 'm' (1) = 4 { name: "8-bit on, 7-bit input SGR", input: "\x1b[31m", options: controlSequences8Bit, expected: 4, desc: "7-bit SGR should not be recognized when only 8-bit is enabled", }, { name: "8-bit on, 7-bit input SGR with text", input: "\x1b[31mhello\x1b[0m", options: controlSequences8Bit, expected: 4 + 5 + 3, desc: "7-bit SGR should contribute visible width when only 8-bit is enabled", }, // Both enabled: both kinds should be zero-width { name: "both on, 7-bit SGR", input: "\x1b[31m", options: controlSequencesBoth, expected: 0, desc: "7-bit SGR should be zero-width when both are enabled", }, { name: "both on, 8-bit CSI", input: "\x9B31m", options: controlSequencesBoth, expected: 0, desc: "C1 CSI should be zero-width when both are enabled", }, { name: "both on, mixed sequences with text", input: "\x1b[31mhello\x9B0m", options: controlSequencesBoth, expected: 5, desc: "Mixed 7-bit and 8-bit sequences should both be zero-width", }, // Neither enabled: both kinds contribute visible width { name: "neither, 7-bit SGR", input: "\x1b[31m", options: defaultOptions, expected: 4, desc: "7-bit SGR should contribute visible width when neither is enabled", }, { name: "neither, 8-bit CSI", input: "\x9B31m", options: defaultOptions, expected: 4, desc: "C1 CSI should contribute visible width when neither is enabled", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.options.String(tt.input) if result != tt.expected { t.Errorf("String(%q) = %d, want %d (%s)", tt.input, result, tt.expected, tt.desc) } result = tt.options.Bytes([]byte(tt.input)) if result != tt.expected { t.Errorf("Bytes(%q) = %d, want %d (%s)", tt.input, result, tt.expected, tt.desc) } }) } } func TestRuneWidth(t *testing.T) { tests := []struct { name string input rune options Options expected int }{ // Control characters (width 0) {"null char", '\x00', defaultOptions, 0}, {"bell", '\x07', defaultOptions, 0}, {"backspace", '\x08', defaultOptions, 0}, {"tab", '\t', defaultOptions, 0}, {"newline", '\n', defaultOptions, 0}, {"carriage return", '\r', defaultOptions, 0}, {"escape", '\x1B', defaultOptions, 0}, {"delete", '\x7F', defaultOptions, 0}, // Combining marks - when tested standalone as runes, they have width 0 // (In actual strings with grapheme clusters, they combine and have width 0) {"combining grave accent", '\u0300', defaultOptions, 0}, {"combining acute accent", '\u0301', defaultOptions, 0}, {"combining diaeresis", '\u0308', defaultOptions, 0}, {"combining tilde", '\u0303', defaultOptions, 0}, // Zero width characters {"zero width space", '\u200B', defaultOptions, 0}, {"zero width non-joiner", '\u200C', defaultOptions, 0}, {"zero width joiner", '\u200D', defaultOptions, 0}, // ASCII printable (width 1) {"space", ' ', defaultOptions, 1}, {"letter a", 'a', defaultOptions, 1}, {"letter Z", 'Z', defaultOptions, 1}, {"digit 0", '0', defaultOptions, 1}, {"digit 9", '9', defaultOptions, 1}, {"exclamation", '!', defaultOptions, 1}, {"at sign", '@', defaultOptions, 1}, {"tilde", '~', defaultOptions, 1}, // Latin extended (width 1) {"latin e with acute", 'é', defaultOptions, 1}, {"latin n with tilde", 'ñ', defaultOptions, 1}, {"latin o with diaeresis", 'ö', defaultOptions, 1}, // East Asian Wide characters {"CJK ideograph", '中', defaultOptions, 2}, {"CJK ideograph", 'æ–‡', defaultOptions, 2}, {"hiragana a", 'ã‚', defaultOptions, 2}, {"katakana a", 'ã‚¢', defaultOptions, 2}, {"hangul syllable", 'ê°€', defaultOptions, 2}, {"hangul syllable", '한', defaultOptions, 2}, // Fullwidth characters {"fullwidth A", 'A', defaultOptions, 2}, {"fullwidth Z", 'Z', defaultOptions, 2}, {"fullwidth 0", 'ï¼', defaultOptions, 2}, {"fullwidth 9", 'ï¼™', defaultOptions, 2}, {"fullwidth exclamation", 'ï¼', defaultOptions, 2}, {"fullwidth space", ' ', defaultOptions, 2}, // Ambiguous characters - default narrow {"black star default", '★', defaultOptions, 1}, {"degree sign default", '°', defaultOptions, 1}, {"plus-minus default", '±', defaultOptions, 1}, {"section sign default", '§', defaultOptions, 1}, {"copyright sign default", '©', defaultOptions, 1}, {"registered sign default", '®', defaultOptions, 1}, // Ambiguous characters - EastAsianWidth wide {"black star EAW", '★', eawOptions, 2}, {"degree sign EAW", '°', eawOptions, 2}, {"plus-minus EAW", '±', eawOptions, 2}, {"section sign EAW", '§', eawOptions, 2}, {"copyright sign EAW", '©', eawOptions, 1}, // Not in ambiguous category {"registered sign EAW", '®', eawOptions, 2}, // Emoji (width 2) {"grinning face", '😀', defaultOptions, 2}, {"grinning face with smiling eyes", 'ðŸ˜', defaultOptions, 2}, {"smiling face with heart-eyes", 'ðŸ˜', defaultOptions, 2}, {"thinking face", '🤔', defaultOptions, 2}, {"rocket", '🚀', defaultOptions, 2}, {"party popper", '🎉', defaultOptions, 2}, {"fire", '🔥', defaultOptions, 2}, {"thumbs up", 'ðŸ‘', defaultOptions, 2}, {"red heart", 'â¤', defaultOptions, 1}, // Text presentation by default {"checkered flag", 'ðŸ', defaultOptions, 2}, // U+1F3C1 chequered flag // Mathematical symbols {"infinity", '∞', defaultOptions, 1}, {"summation", '∑', defaultOptions, 1}, {"integral", '∫', defaultOptions, 1}, {"square root", '√', defaultOptions, 1}, // Currency symbols {"dollar", '$', defaultOptions, 1}, {"euro", '€', defaultOptions, 1}, {"pound", '£', defaultOptions, 1}, {"yen", 'Â¥', defaultOptions, 1}, // Box drawing characters {"box light horizontal", '─', defaultOptions, 1}, {"box light vertical", '│', defaultOptions, 1}, {"box light down and right", '┌', defaultOptions, 1}, // Special Unicode characters {"bullet", '•', defaultOptions, 1}, {"ellipsis", '…', defaultOptions, 1}, {"em dash", '—', defaultOptions, 1}, {"left single quote", '\u2018', defaultOptions, 1}, {"right single quote", '\u2019', defaultOptions, 1}, // Test edge cases with options disabled {"ambiguous EAW disabled", '★', defaultOptions, 1}, // Variation selectors (note: Rune() tests single runes without VS, not sequences) {"☺ U+263A text default", '☺', defaultOptions, 1}, // Has text presentation by default {"⌛ U+231B emoji default", '⌛', defaultOptions, 2}, // Has emoji presentation by default {"⤠U+2764 text default", 'â¤', defaultOptions, 1}, // Has text presentation by default {"✂ U+2702 text default", '✂', defaultOptions, 1}, // Has text presentation by default {"VS16 U+FE0F alone", '\ufe0f', defaultOptions, 0}, // Variation selectors are zero-width by themselves {"VS15 U+FE0E alone", '\ufe0e', defaultOptions, 0}, // Variation selectors are zero-width by themselves {"keycap U+20E3 alone", '\u20e3', defaultOptions, 0}, // Combining enclosing keycap is zero-width alone } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := tt.options.Rune(tt.input) if result != tt.expected { t.Errorf("RuneWidth(%q, %v) = %d, want %d", tt.input, tt.options, result, tt.expected) } }) } } // TestEmojiPresentation verifies correct width behavior for characters with different // Emoji_Presentation property values according to TR51 conformance func TestEmojiPresentation(t *testing.T) { tests := []struct { name string input string wantDefault int wantWithVS16 int wantWithVS15 int description string }{ // Characters with Extended_Pictographic=Yes AND Emoji_Presentation=Yes // Should have width 2 by default (emoji presentation) // VS15 is a no-op per Unicode TR51 - it requests text presentation but doesn't change width { name: "Watch (EP=Yes, EmojiPres=Yes)", input: "\u231A", wantDefault: 2, wantWithVS16: 2, wantWithVS15: 2, // VS15 is a no-op, width remains 2 description: "⌚ U+231A has default emoji presentation", }, { name: "Hourglass (EP=Yes, EmojiPres=Yes)", input: "\u231B", wantDefault: 2, wantWithVS16: 2, wantWithVS15: 2, // VS15 is a no-op, width remains 2 description: "⌛ U+231B has default emoji presentation", }, { name: "Fast-forward (EP=Yes, EmojiPres=Yes)", input: "\u23E9", wantDefault: 2, wantWithVS16: 2, wantWithVS15: 2, // VS15 is a no-op, width remains 2 description: "â© U+23E9 has default emoji presentation", }, { name: "Alarm Clock (EP=Yes, EmojiPres=Yes)", input: "\u23F0", wantDefault: 2, wantWithVS16: 2, wantWithVS15: 2, // VS15 is a no-op, width remains 2 description: "â° U+23F0 has default emoji presentation", }, { name: "Soccer Ball (EP=Yes, EmojiPres=Yes)", input: "\u26BD", wantDefault: 2, wantWithVS16: 2, wantWithVS15: 2, // VS15 is a no-op, width remains 2 description: "âš½ U+26BD has default emoji presentation", }, { name: "Anchor (EP=Yes, EmojiPres=Yes)", input: "\u2693", wantDefault: 2, wantWithVS16: 2, wantWithVS15: 2, // VS15 is a no-op, width remains 2 description: "âš“ U+2693 has default emoji presentation", }, // Characters with Extended_Pictographic=Yes BUT Emoji_Presentation=No // Should have width 1 by default (text presentation) { name: "Star of David (EP=Yes, EmojiPres=No)", input: "\u2721", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "✡ U+2721 has default text presentation", }, { name: "Hammer and Pick (EP=Yes, EmojiPres=No)", input: "\u2692", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "âš’ U+2692 has default text presentation", }, { name: "Gear (EP=Yes, EmojiPres=No)", input: "\u2699", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "âš™ U+2699 has default text presentation", }, { name: "Star and Crescent (EP=Yes, EmojiPres=No)", input: "\u262A", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "☪ U+262A has default text presentation", }, { name: "Infinity (EP=Yes, EmojiPres=No)", input: "\u267E", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "♾ U+267E has default text presentation", }, { name: "Recycling Symbol (EP=Yes, EmojiPres=No)", input: "\u267B", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "â™» U+267B has default text presentation", }, // Characters with Emoji=Yes but NOT Extended_Pictographic // These are typically ASCII characters like # that can become emoji with VS16 { name: "Hash Sign (Emoji=Yes, EP=No)", input: "\u0023", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "# U+0023 has default text presentation", }, { name: "Asterisk (Emoji=Yes, EP=No)", input: "\u002A", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "* U+002A has default text presentation", }, { name: "Digit Zero (Emoji=Yes, EP=No)", input: "\u0030", wantDefault: 1, wantWithVS16: 2, wantWithVS15: 1, description: "0 U+0030 has default text presentation", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Test default width (no variation selector) gotDefault := String(tt.input) if gotDefault != tt.wantDefault { t.Errorf("String(%q) default = %d, want %d (%s)", tt.input, gotDefault, tt.wantDefault, tt.description) } // Test with VS16 (U+FE0F) for emoji presentation inputWithVS16 := tt.input + "\uFE0F" gotWithVS16 := String(inputWithVS16) if gotWithVS16 != tt.wantWithVS16 { t.Errorf("String(%q) with VS16 = %d, want %d (%s)", tt.input, gotWithVS16, tt.wantWithVS16, tt.description) } // Test with VS15 (U+FE0E) - VS15 is a no-op per Unicode TR51 // It requests text presentation but does not affect width calculation inputWithVS15 := tt.input + "\uFE0E" gotWithVS15 := String(inputWithVS15) if gotWithVS15 != tt.wantWithVS15 { t.Errorf("String(%q) with VS15 = %d, want %d (%s)", tt.input, gotWithVS15, tt.wantWithVS15, tt.description) } }) } } // TestEmojiPresentationRune tests the Rune() function specifically func TestEmojiPresentationRune(t *testing.T) { tests := []struct { name string r rune want int desc string }{ // Emoji_Presentation=Yes {name: "Watch", r: '\u231A', want: 2, desc: "⌚ has default emoji presentation"}, {name: "Alarm Clock", r: '\u23F0', want: 2, desc: "â° has default emoji presentation"}, {name: "Soccer Ball", r: '\u26BD', want: 2, desc: "âš½ has default emoji presentation"}, // Emoji_Presentation=No (but Extended_Pictographic=Yes) {name: "Star of David", r: '\u2721', want: 1, desc: "✡ has default text presentation"}, {name: "Hammer and Pick", r: '\u2692', want: 1, desc: "âš’ has default text presentation"}, {name: "Gear", r: '\u2699', want: 1, desc: "âš™ has default text presentation"}, {name: "Infinity", r: '\u267E', want: 1, desc: "♾ has default text presentation"}, // Not Extended_Pictographic {name: "Hash Sign", r: '#', want: 1, desc: "# is regular ASCII"}, {name: "Asterisk", r: '*', want: 1, desc: "* is regular ASCII"}, {name: "Digit Zero", r: '0', want: 1, desc: "0 is regular ASCII"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := Rune(tt.r) if got != tt.want { t.Errorf("Rune(%U) = %d, want %d (%s)", tt.r, got, tt.want, tt.desc) } }) } } // TestComplexEmojiSequences tests width of complex emoji sequences func TestComplexEmojiSequences(t *testing.T) { tests := []struct { name string input string want int desc string }{ { name: "Keycap sequence #ï¸âƒ£", input: "#\uFE0F\u20E3", want: 2, desc: "# + VS16 + combining enclosing keycap", }, { name: "Keycap sequence 0ï¸âƒ£", input: "0\uFE0F\u20E3", want: 2, desc: "0 + VS16 + combining enclosing keycap", }, { name: "Flag sequence 🇺🇸 (Regional Indicator pair)", input: "\U0001F1FA\U0001F1F8", want: 2, desc: "US flag (RI pair)", }, { name: "Single Regional Indicator 🇺", input: "\U0001F1FA", want: 2, desc: "U (RI)", }, { name: "ZWJ sequence 👨â€ðŸ‘©â€ðŸ‘§", input: "\U0001F468\u200D\U0001F469\u200D\U0001F467", want: 2, desc: "Family emoji (man + ZWJ + woman + ZWJ + girl)", }, { name: "Skin tone modifier ðŸ‘ðŸ½", input: "\U0001F44D\U0001F3FD", want: 2, desc: "Thumbs up with medium skin tone", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := String(tt.input) if got != tt.want { t.Errorf("String(%q) = %d, want %d (%s)", tt.input, got, tt.want, tt.desc) } }) } } // TestMixedContent tests width of strings with mixed emoji and text func TestMixedContent(t *testing.T) { tests := []struct { name string input string want int desc string }{ { name: "Text with emoji-presentation emoji", input: "Hi\u231AWorld", want: 9, // "Hi" (2) + ⌚ (2) + "World" (5) desc: "Text with watch emoji (emoji presentation)", }, { name: "Text with text-presentation emoji", input: "Hi\u2721Go", want: 5, // "Hi" (2) + ✡ (1) + "Go" (2) desc: "Text with star of David (text presentation)", }, { name: "Text with text-presentation emoji + VS16", input: "Hi\u2721\uFE0FGo", want: 6, // "Hi" (2) + âœ¡ï¸ (2) + "Go" (2) desc: "Text with star of David (forced emoji presentation)", }, { name: "Multiple emojis", input: "⌚⚽⚓", want: 6, // All three have Emoji_Presentation=Yes desc: "Watch, soccer ball, anchor", }, { name: "Mixed presentation", input: "⌚⚙⚓", want: 5, // ⌚(2) + âš™(1) + âš“(2) desc: "Watch (emoji), gear (text), anchor (emoji)", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := String(tt.input) if got != tt.want { t.Errorf("String(%q) = %d, want %d (%s)", tt.input, got, tt.want, tt.desc) } }) } } // TestTR51Conformance verifies key TR51 conformance requirements func TestTR51Conformance(t *testing.T) { t.Run("C1: Default Emoji Presentation", func(t *testing.T) { // Characters with Emoji_Presentation=Yes should display as emoji by default (width 2) emojiPresentationChars := []rune{ '\u231A', // ⌚ watch '\u231B', // ⌛ hourglass '\u23F0', // â° alarm clock '\u26BD', // âš½ soccer ball '\u2693', // âš“ anchor } for _, r := range emojiPresentationChars { got := Rune(r) if got != 2 { t.Errorf("Rune(%U) = %d, want 2 (should have default emoji presentation)", r, got) } } }) t.Run("C1: Default Text Presentation", func(t *testing.T) { // Characters with Emoji_Presentation=No should display as text by default (width 1) textPresentationChars := []rune{ '\u2721', // ✡ star of David '\u2692', // âš’ hammer and pick '\u2699', // âš™ gear '\u267E', // ♾ infinity '\u267B', // â™» recycling symbol } for _, r := range textPresentationChars { got := Rune(r) if got != 1 { t.Errorf("Rune(%U) = %d, want 1 (should have default text presentation)", r, got) } } }) t.Run("C2: VS15 is a no-op for width calculation", func(t *testing.T) { // VS15 (U+FE0E) requests text presentation but does not affect width per Unicode TR51. // The width should be the same as the base character. emojiWithVS15 := []struct { char string base string }{ {"\u231A\uFE0E", "\u231A"}, // ⌚︎ watch with VS15 {"\u26BD\uFE0E", "\u26BD"}, // ⚽︎ soccer ball with VS15 {"\u2693\uFE0E", "\u2693"}, // ⚓︎ anchor with VS15 } for _, tc := range emojiWithVS15 { baseWidth := String(tc.base) vs15Width := String(tc.char) if vs15Width != baseWidth { t.Errorf("String(%q) with VS15 = %d, want %d (VS15 is a no-op, width should match base)", tc.char, vs15Width, baseWidth) } } // VS15 with East Asian Wide characters should preserve width 2 (no-op) eastAsianWideWithVS15 := []struct { char string base string }{ {"中\uFE0E", "中"}, // CJK ideograph with VS15 {"æ—¥\uFE0E", "æ—¥"}, // CJK ideograph with VS15 } for _, tc := range eastAsianWideWithVS15 { baseWidth := String(tc.base) vs15Width := String(tc.char) if vs15Width != baseWidth { t.Errorf("String(%q) with VS15 = %d, want %d (VS15 is a no-op, width should match base)", tc.char, vs15Width, baseWidth) } } }) t.Run("C3: VS16 forces emoji presentation", func(t *testing.T) { // VS16 (U+FE0F) should force emoji presentation (width 2) even for text-presentation characters textWithVS16 := []string{ "\u2721\uFE0F", // âœ¡ï¸ star of David with VS16 "\u2692\uFE0F", // âš’ï¸ hammer and pick with VS16 "\u2699\uFE0F", // âš™ï¸ gear with VS16 "\u0023\uFE0F", // #ï¸ hash with VS16 } for _, s := range textWithVS16 { got := String(s) if got != 2 { t.Errorf("String(%q) with VS16 = %d, want 2 (VS16 should force emoji presentation)", s, got) } } }) t.Run("ED-16: ZWJ Sequences treated as single grapheme", func(t *testing.T) { // ZWJ sequences should be treated as a single grapheme cluster by the grapheme tokenizer // and should have width 2 (since they display as a single emoji image) tests := []struct { name string sequence string want int desc string }{ { name: "Family", sequence: "\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466", // 👨â€ðŸ‘©â€ðŸ‘§â€ðŸ‘¦ want: 2, desc: "Family: man, woman, girl, boy (4 people + 3 ZWJ)", }, { name: "Kiss", sequence: "\U0001F469\u200D\u2764\uFE0F\u200D\U0001F48B\u200D\U0001F468", // 👩â€â¤ï¸â€ðŸ’‹â€ðŸ‘¨ want: 2, desc: "Kiss: woman, heart, kiss mark, man", }, { name: "Couple with heart", sequence: "\U0001F469\u200D\u2764\uFE0F\u200D\U0001F468", // 👩â€â¤ï¸â€ðŸ‘¨ want: 2, desc: "Couple with heart: woman, heart, man", }, { name: "Woman technologist", sequence: "\U0001F469\u200D\U0001F4BB", // 👩â€ðŸ’» want: 2, desc: "Woman technologist: woman, ZWJ, laptop", }, { name: "Rainbow flag", sequence: "\U0001F3F4\u200D\U0001F308", // ðŸ´â€ðŸŒˆ want: 2, desc: "Rainbow flag: black flag, ZWJ, rainbow", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := String(tt.sequence) if got != tt.want { t.Errorf("String(%q) = %d, want %d (%s)", tt.sequence, got, tt.want, tt.desc) // Show the individual components for debugging t.Logf(" Sequence: %+q", tt.sequence) t.Logf(" Expected: single grapheme cluster of width %d", tt.want) t.Logf(" Got: %d (if > 2, grapheme tokenizer may not be recognizing ZWJ sequence)", got) } }) } }) // ED-13: Emoji Modifier Sequences // Per TR51: emoji_modifier_sequence := emoji_modifier_base emoji_modifier // These should be treated as single grapheme clusters with width 2 t.Run("ED-13: Emoji Modifier Sequences", func(t *testing.T) { tests := []struct { sequence string want int desc string }{ {"ðŸ‘ðŸ»", 2, "thumbs up + light skin tone"}, {"ðŸ‘ðŸ¼", 2, "thumbs up + medium-light skin tone"}, {"ðŸ‘ðŸ½", 2, "thumbs up + medium skin tone"}, {"ðŸ‘ðŸ¾", 2, "thumbs up + medium-dark skin tone"}, {"ðŸ‘ðŸ¿", 2, "thumbs up + dark skin tone"}, {"👋ðŸ»", 2, "waving hand + light skin tone"}, {"🧑ðŸ½", 2, "person + medium skin tone"}, {"👶ðŸ¿", 2, "baby + dark skin tone"}, {"👩ðŸ¼", 2, "woman + medium-light skin tone"}, } for _, tt := range tests { t.Run(tt.desc, func(t *testing.T) { got := String(tt.sequence) if got != tt.want { t.Errorf("String(%q) = %d, want %d (%s)", tt.sequence, got, tt.want, tt.desc) t.Logf(" Sequence: %+q", tt.sequence) t.Logf(" Expected: single grapheme cluster of width %d", tt.want) t.Logf(" Got: %d (if > 2, grapheme tokenizer may not be recognizing modifier sequence)", got) } }) } }) } func TestStringGraphemes(t *testing.T) { tests := []struct { name string input string options Options }{ {"empty string", "", defaultOptions}, {"single ASCII", "a", defaultOptions}, {"multiple ASCII", "hello", defaultOptions}, {"ASCII with spaces", "hello world", defaultOptions}, {"ASCII with newline", "hello\nworld", defaultOptions}, {"CJK ideograph", "中", defaultOptions}, {"CJK with ASCII", "hello中", defaultOptions}, {"ambiguous character", "★", defaultOptions}, {"ambiguous character EAW", "★", eawOptions}, {"emoji", "😀", defaultOptions}, {"flag US", "🇺🇸", defaultOptions}, {"text with flags", "Go 🇺🇸🚀", defaultOptions}, {"keycap 1ï¸âƒ£", "1ï¸âƒ£", defaultOptions}, {"mixed content", "Hi⌚⚙⚓", defaultOptions}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Get expected width using String expected := tt.options.String(tt.input) // Iterate over graphemes and sum widths iter := tt.options.StringGraphemes(tt.input) got := 0 for iter.Next() { got += iter.Width() } if got != expected { t.Errorf("StringGraphemes(%q) sum = %d, want %d (from String)", tt.input, got, expected) } }) } } func TestBytesGraphemes(t *testing.T) { tests := []struct { name string input []byte options Options }{ {"empty bytes", []byte(""), defaultOptions}, {"single ASCII", []byte("a"), defaultOptions}, {"multiple ASCII", []byte("hello"), defaultOptions}, {"ASCII with spaces", []byte("hello world"), defaultOptions}, {"ASCII with newline", []byte("hello\nworld"), defaultOptions}, {"CJK ideograph", []byte("中"), defaultOptions}, {"CJK with ASCII", []byte("hello中"), defaultOptions}, {"ambiguous character", []byte("★"), defaultOptions}, {"ambiguous character EAW", []byte("★"), eawOptions}, {"emoji", []byte("😀"), defaultOptions}, {"flag US", []byte("🇺🇸"), defaultOptions}, {"text with flags", []byte("Go 🇺🇸🚀"), defaultOptions}, {"keycap 1ï¸âƒ£", []byte("1ï¸âƒ£"), defaultOptions}, {"mixed content", []byte("Hi⌚⚙⚓"), defaultOptions}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Get expected width using Bytes expected := tt.options.Bytes(tt.input) // Iterate over graphemes and sum widths iter := tt.options.BytesGraphemes(tt.input) got := 0 for iter.Next() { got += iter.Width() } if got != expected { t.Errorf("BytesGraphemes(%q) sum = %d, want %d (from Bytes)", tt.input, got, expected) } }) } } func TestGraphemesControlSequences(t *testing.T) { tests := []struct { name string input string options Options }{ // ControlSequences true: ANSI sequences are one zero-width grapheme each; visible width only {"ControlSequences ANSI wrapped", "\x1b[31mhello\x1b[0m", controlSequences}, {"ControlSequences ANSI only", "\x1b[0m", controlSequences}, {"ControlSequences plain text", "hi", controlSequences}, {"ControlSequences ANSI mid", "a\x1b[31mb\x1b[0mc", controlSequences}, // Default options: sum of grapheme widths must still match String/Bytes {"default ANSI wrapped", "\x1b[31mhello\x1b[0m", defaultOptions}, {"default plain", "hello", defaultOptions}, // 8-bit ControlSequences: C1 sequences are one zero-width grapheme each {"8-bit C1 CSI wrapped", "\x9B31mhello\x9B0m", controlSequences8Bit}, {"8-bit C1 CSI only", "\x9B0m", controlSequences8Bit}, {"8-bit plain text", "hi", controlSequences8Bit}, {"8-bit C1 CSI mid", "a\x9B31mb\x9B0mc", controlSequences8Bit}, // Both options: both 7-bit and 8-bit sequences are zero-width graphemes {"both: mixed", "\x1b[31mhello\x9B0m", controlSequencesBoth}, {"both: 7-bit only input", "\x1b[31mhi\x1b[0m", controlSequencesBoth}, {"both: 8-bit only input", "\x9B31mhi\x9B0m", controlSequencesBoth}, // Independence: 7-bit on but 8-bit input — graphemes must still sum correctly {"7-bit on, 8-bit input", "\x9B31mhello\x9B0m", controlSequences}, // Independence: 8-bit on but 7-bit input {"8-bit on, 7-bit input", "\x1b[31mhello\x1b[0m", controlSequences8Bit}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // StringGraphemes: option must be passed through; sum of Width() matches String() expected := tt.options.String(tt.input) iter := tt.options.StringGraphemes(tt.input) got := 0 for iter.Next() { got += iter.Width() } if got != expected { t.Errorf("StringGraphemes(%q) sum Width() = %d, want %d (String)", tt.input, got, expected) } // BytesGraphemes: same option and outcome for []byte b := []byte(tt.input) expectedBytes := tt.options.Bytes(b) iterBytes := tt.options.BytesGraphemes(b) gotBytes := 0 for iterBytes.Next() { gotBytes += iterBytes.Width() } if gotBytes != expectedBytes { t.Errorf("BytesGraphemes(%q) sum Width() = %d, want %d (Bytes)", b, gotBytes, expectedBytes) } }) } } func TestAsciiWidth(t *testing.T) { tests := []struct { name string b byte expected int desc string }{ // Control characters (0x00-0x1F): width 0 {"null", 0x00, 0, "NULL character"}, {"bell", 0x07, 0, "BEL (bell)"}, {"backspace", 0x08, 0, "BS (backspace)"}, {"tab", 0x09, 0, "TAB"}, {"newline", 0x0A, 0, "LF (newline)"}, {"carriage return", 0x0D, 0, "CR (carriage return)"}, {"escape", 0x1B, 0, "ESC (escape)"}, {"last control", 0x1F, 0, "Last control character"}, // Printable ASCII (0x20-0x7E): width 1 {"space", 0x20, 1, "Space (first printable)"}, {"exclamation", 0x21, 1, "!"}, {"zero", 0x30, 1, "0"}, {"nine", 0x39, 1, "9"}, {"A", 0x41, 1, "A"}, {"Z", 0x5A, 1, "Z"}, {"a", 0x61, 1, "a"}, {"z", 0x7A, 1, "z"}, {"tilde", 0x7E, 1, "~ (last printable)"}, // DEL (0x7F): width 0 {"delete", 0x7F, 0, "DEL (delete)"}, // >= 128: width 1 (default, though shouldn't be used for valid UTF-8) {"0x80", 0x80, 1, "First byte >= 128"}, {"0xFF", 0xFF, 1, "Last byte value"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := asciiWidth(tt.b) if got != tt.expected { t.Errorf("asciiWidth(0x%02X '%s') = %d, want %d (%s)", tt.b, string(tt.b), got, tt.expected, tt.desc) } }) } } func TestTruncateString(t *testing.T) { tests := []struct { name string input string maxWidth int tail string options Options expected string }{ // Empty string cases {"empty string", "", 0, "", defaultOptions, ""}, {"empty string with tail", "", 5, "...", defaultOptions, ""}, {"empty string large maxWidth", "", 100, "...", defaultOptions, ""}, // No truncation needed {"fits exactly", "hello", 5, "...", defaultOptions, "hello"}, {"fits with room", "hi", 10, "...", defaultOptions, "hi"}, {"single char fits", "a", 1, "...", defaultOptions, "a"}, // Basic truncation - ASCII {"truncate ASCII", "hello world", 5, "...", defaultOptions, "he..."}, {"truncate ASCII at start", "hello", 0, "...", defaultOptions, "..."}, {"truncate ASCII single char", "hello", 1, "...", defaultOptions, "..."}, {"truncate ASCII with empty tail", "hello world", 5, "", defaultOptions, "hello"}, // Truncation with wide characters (CJK) {"CJK fits", "中", 2, "...", defaultOptions, "中"}, {"CJK truncate", "中", 1, "...", defaultOptions, "..."}, {"CJK with ASCII", "hello中", 5, "...", defaultOptions, "he..."}, {"CJK with ASCII fits", "hello中", 7, "...", defaultOptions, "hello中"}, {"CJK with ASCII partial", "hello中", 6, "...", defaultOptions, "hel..."}, {"multiple CJK", "中文", 2, "...", defaultOptions, "..."}, {"multiple CJK fits", "中文", 4, "...", defaultOptions, "中文"}, // Truncation with emoji {"emoji fits", "😀", 2, "...", defaultOptions, "😀"}, {"emoji truncate", "😀", 1, "...", defaultOptions, "..."}, {"emoji with ASCII", "hello😀", 5, "...", defaultOptions, "he..."}, {"emoji with ASCII fits", "hello😀", 7, "...", defaultOptions, "hello😀"}, {"multiple emoji", "😀ðŸ˜", 2, "...", defaultOptions, "..."}, {"multiple emoji fits", "😀ðŸ˜", 4, "...", defaultOptions, "😀ðŸ˜"}, // Truncation with control characters (zero width) // Control characters have width 0 but are preserved in the string structure {"with newline", "hello\nworld", 5, "...", defaultOptions, "he..."}, {"with tab", "hello\tworld", 5, "...", defaultOptions, "he..."}, {"newline at start", "\nhello", 5, "...", defaultOptions, "\nhello"}, {"multiple newlines", "a\n\nb", 1, "...", defaultOptions, "..."}, // Mixed content {"ASCII CJK emoji", "hi中😀", 2, "...", defaultOptions, "..."}, {"ASCII CJK emoji fits", "hi中😀", 6, "...", defaultOptions, "hi中😀"}, {"ASCII CJK emoji partial", "hi中😀", 4, "...", defaultOptions, "h..."}, {"complex mixed", "Go 🇺🇸🚀", 3, "...", defaultOptions, "..."}, {"complex mixed fits", "Go 🇺🇸🚀", 7, "...", defaultOptions, "Go 🇺🇸🚀"}, // ControlSequences (ANSI escape sequences): truncation by visible width only. // When ControlSequences is true, escape sequences that appear after the // truncation point are preserved (appended after the tail). This prevents // color bleed from unclosed SGR sequences in terminal output. {"ControlSequences plain no truncation", "hello", 5, "...", controlSequences, "hello"}, {"ControlSequences ANSI wrapped no truncation", "\x1b[31mhello\x1b[0m", 8, "...", controlSequences, "\x1b[31mhello\x1b[0m"}, {"ControlSequences ANSI wrapped truncate", "\x1b[31mhello\x1b[0m", 4, "...", controlSequences, "\x1b[31mh...\x1b[0m"}, {"ControlSequences ANSI in middle truncate", "hello\x1b[31mworld", 5, "...", controlSequences, "he...\x1b[31m"}, {"ControlSequences CJK truncate", "\x1b[31m中文\x1b[0m", 2, "...", controlSequences, "...\x1b[31m\x1b[0m"}, {"ControlSequences CJK no truncation", "\x1b[31m中文\x1b[0m", 7, "...", controlSequences, "\x1b[31m中文\x1b[0m"}, {"ControlSequences CJK one wide then tail", "\x1b[31m中文xx\x1b[0m", 5, "...", controlSequences, "\x1b[31m中...\x1b[0m"}, // Stacked SGR sequences: all escape sequences after cut are preserved {"ControlSequences stacked SGR", "\x1b[31m\x1b[42mhello\x1b[0m", 4, "...", controlSequences, "\x1b[31m\x1b[42mh...\x1b[0m"}, // Escape sequence between visible chars after cut: preserved {"ControlSequences mid-escape after cut", "\x1b[31mhello\x1b[42mworld\x1b[0m", 6, "...", controlSequences, "\x1b[31mhel...\x1b[42m\x1b[0m"}, // No escape sequences after cut: same as before {"ControlSequences no trailing escape", "\x1b[31mhello", 4, "...", controlSequences, "\x1b[31mh..."}, // Multiple colors: all trailing escapes preserved {"ControlSequences multi color", "a\x1b[31mb\x1b[32mc\x1b[33md\x1b[0m", 2, "...", controlSequences, "...\x1b[31m\x1b[32m\x1b[33m\x1b[0m"}, // 8-bit ControlSequences8Bit is ignored by truncation entirely. The // grapheme parser is not told about 8-bit, so C1 sequence parameters // (e.g. "31m" after \x9B) are treated as visible characters. This is // intentional: 8-bit C1 bytes (0x80-0x9F) overlap with UTF-8 multi-byte // encoding, making them unsafe to manipulate during truncation. {"8-bit plain no truncation", "hello", 5, "...", controlSequences8Bit, "hello"}, {"8-bit C1 CSI wrapped truncate", "\x9B31mhello\x9B0m", 8, "...", controlSequences8Bit, "\x9B31mh..."}, {"8-bit C1 CSI wrapped truncate narrow", "\x9B31mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B..."}, {"8-bit C1 CSI in middle truncate", "hello\x9B31mworld", 5, "...", controlSequences8Bit, "he..."}, {"8-bit C1 CSI CJK truncate", "\x9B31m中文\x9B0m", 2, "...", controlSequences8Bit, "..."}, {"8-bit C1 CSI no trailing escape", "\x9B31mhello", 4, "...", controlSequences8Bit, "\x9B..."}, {"8-bit C1 stacked SGR", "\x9B31m\x9B42mhello\x9B0m", 4, "...", controlSequences8Bit, "\x9B..."}, // 7-bit only must NOT preserve trailing C1 sequences. // With 7-bit only, \x9B is a regular character (width 1), so the input // "hello\x9B0m" has visible width 8. Trailing \x9B0m is not preserved. {"7-bit only ignores trailing C1", "hello\x9B0m", 5, "...", controlSequences, "he..."}, // Both enabled: only 7-bit trailing escapes are preserved; 8-bit is // ignored by truncation, so C1 parameters are visible characters. {"both: mixed trailing escapes", "\x1b[31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x1b[31mh..."}, {"both: 7-bit wrapped truncate", "\x1b[31mhello\x1b[0m", 4, "...", controlSequencesBoth, "\x1b[31mh...\x1b[0m"}, {"both: 8-bit wrapped truncate", "\x9B31mhello\x9B0m", 4, "...", controlSequencesBoth, "\x9B..."}, // East Asian Width option {"ambiguous EAW fits", "★", 2, "...", eawOptions, "★"}, {"ambiguous EAW truncate", "★", 1, "...", eawOptions, "..."}, {"ambiguous default fits", "★", 1, "...", defaultOptions, "★"}, {"ambiguous mixed", "a★b", 2, "...", eawOptions, "..."}, {"ambiguous mixed default", "a★b", 2, "...", defaultOptions, "..."}, // Edge cases {"zero maxWidth", "hello", 0, "...", defaultOptions, "..."}, {"very long string", "a very long string that will definitely be truncated", 10, "...", defaultOptions, "a very ..."}, // Bug fix: wide char at boundary with narrow chars - ensures truncation position is correct // Input "中cde" (width 5), maxWidth 4, tail "ab" (width 2) -> should return "中ab" (width 4) {"wide char boundary bug fix", "中cde", 4, "ab", defaultOptions, "中ab"}, // Tail variations {"custom tail", "hello world", 5, "…", defaultOptions, "hell…"}, {"long tail", "hello", 3, ">>>", defaultOptions, ">>>"}, {"tail with wide char", "hello", 3, "中", defaultOptions, "h中"}, {"tail with emoji", "hello", 3, "😀", defaultOptions, "h😀"}, // Grapheme boundary tests (ensuring truncation happens at grapheme boundaries) {"keycap sequence", "1ï¸âƒ£2ï¸âƒ£", 2, "...", defaultOptions, "..."}, {"flag sequence", "🇺🇸🇯🇵", 2, "...", defaultOptions, "..."}, {"ZWJ sequence", "👨â€ðŸ‘©â€ðŸ‘§", 2, "...", defaultOptions, "👨â€ðŸ‘©â€ðŸ‘§"}, {"ZWJ sequence truncate", "👨â€ðŸ‘©â€ðŸ‘§ðŸ‘¨â€ðŸ‘©â€ðŸ‘§", 2, "...", defaultOptions, "..."}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { { got := tt.options.TruncateString(tt.input, tt.maxWidth, tt.tail) if got != tt.expected { t.Errorf("TruncateString(%q, %d, %q) with options %v = %q, want %q", tt.input, tt.maxWidth, tt.tail, tt.options, got, tt.expected) inputWidth := tt.options.String(tt.input) gotWidth := tt.options.String(got) t.Logf(" Input width: %d, Got width: %d, MaxWidth: %d", inputWidth, gotWidth, tt.maxWidth) } // Verify visible width respects maxWidth (or tailWidth if tail is wider) gotWidth := tt.options.String(got) limit := tt.maxWidth tailWidth := tt.options.String(tt.tail) if tailWidth > limit { limit = tailWidth } if gotWidth > limit { t.Errorf("Result visible width (%d) exceeds max(maxWidth, tailWidth) (%d)", gotWidth, limit) } } { input := []byte(tt.input) tail := []byte(tt.tail) expected := []byte(tt.expected) got := tt.options.TruncateBytes(input, tt.maxWidth, tail) if !bytes.Equal(got, expected) { t.Errorf("TruncateBytes(%q, %d, %q) with options %v = %q, want %q", input, tt.maxWidth, tail, tt.options, got, expected) inputWidth := tt.options.Bytes(input) gotWidth := tt.options.Bytes(got) t.Logf(" Input width: %d, Got width: %d, MaxWidth: %d", inputWidth, gotWidth, tt.maxWidth) } // Verify visible width respects maxWidth (or tailWidth if tail is wider) gotWidth := tt.options.Bytes(got) limit := tt.maxWidth tailWidth := tt.options.Bytes(tail) if tailWidth > limit { limit = tailWidth } if gotWidth > limit { t.Errorf("Result visible width (%d) exceeds max(maxWidth, tailWidth) (%d)", gotWidth, limit) } } }) } } func TestTruncateBytesDoesNotMutateInput(t *testing.T) { // Test that TruncateBytes does not mutate the caller's slice original := []byte("hello world") originalCopy := make([]byte, len(original)) copy(originalCopy, original) tail := []byte("...") _ = TruncateBytes(original, 5, tail) if !bytes.Equal(original, originalCopy) { t.Errorf("TruncateBytes mutated the input slice: got %q, want %q", original, originalCopy) } } func TestPrintableASCIILength(t *testing.T) { tests := []struct { name string input string expected int desc string }{ // Some of these tests are left over from a SWAR implementation, // which cared about 8 byte boundaries. {"empty string", "", 0, "Empty string has 0 printable bytes"}, {"single char", "a", 1, "Single printable byte"}, {"single space", " ", 1, "Space is printable"}, {"7 bytes", "1234567", 7, "7 printable bytes"}, {"8 bytes", "12345678", 8, "8 printable bytes"}, {"9 bytes", "123456789", 9, "9 printable bytes"}, {"15 bytes", "123456789012345", 15, "15 printable bytes"}, {"16 bytes", "1234567890123456", 16, "16 printable bytes"}, {"17 bytes", "12345678901234567", 17, "17 printable bytes"}, {"24 bytes", "123456789012345678901234", 24, "24 printable bytes"}, {"long ASCII", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", 62, "All 62 printable bytes"}, {"all printable range", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 95, "All 95 printable ASCII chars"}, // Non-printable at start -> 0 {"control at start", "\x00hello world", 0, "Control char at start"}, {"DEL at start", "\x7Fhello world", 0, "DEL at start"}, {"non-ASCII at start", "\x80hello world", 0, "Non-ASCII at start"}, {"UTF-8 at start", "\xC2\xA0hello world", 0, "UTF-8 at start"}, {"emoji at start", "\xF0\x9F\x98\x80hello123", 0, "Emoji at start"}, // Non-printable in middle - stops before it {"control in middle", "hello\x00world123", 5, "Control at pos 5, returns 5"}, {"DEL in middle", "hello\x7Fworld123", 5, "DEL at pos 5, returns 5"}, {"control after 8", "12345678\x00world", 8, "Control at pos 8, returns 8"}, {"DEL after 8", "12345678\x7Fworld", 8, "DEL at pos 8, returns 8"}, {"control at pos 15", "123456789012345\x00", 15, "Control at pos 15, returns 15"}, // Non-ASCII at end - backs off by 1 {"non-ASCII at end of 9", "12345678\x80", 7, "Backs off 1 before non-ASCII"}, {"non-ASCII at end of 17", "1234567890123456\x80", 15, "Backs off 1 before non-ASCII"}, {"combining after 16", "1234567890123456\u0301", 15, "Backs off 1 before combining mark"}, {"non-ASCII after 1", "a\x80", 0, "Backs off 1 from 1, returns 0"}, {"16 ASCII then emoji", "1234567890123456\xF0\x9F\x98\x80", 15, "Backs off 1 before emoji"}, // Printable boundaries {"8 spaces", " ", 8, "Space (0x20) is first printable"}, {"8 tildes", "~~~~~~~~", 8, "Tilde (0x7E) is last printable"}, {"mixed printable", "Hello, World! 123", 17, "All 17 are printable"}, // Control characters at various positions {"control at pos 0", "\x00234567890", 0, "Control at position 0"}, {"control at pos 3", "123\x00567890", 3, "Control at position 3"}, {"control at pos 7", "1234567\x000", 7, "Control at position 7"}, // DEL at various positions {"DEL at pos 0", "\x7F234567890", 0, "DEL at position 0"}, {"DEL at pos 3", "123\x7F567890", 3, "DEL at position 3"}, {"DEL at pos 7", "1234567\x7F0", 7, "DEL at position 7"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := printableASCIILength(tt.input) if got != tt.expected { t.Errorf("printableASCIILength(%q) = %d, want %d (%s)", tt.input, got, tt.expected, tt.desc) if len(tt.input) > 0 { t.Logf(" String length: %d bytes", len(tt.input)) for i, b := range []byte(tt.input) { isPrintable := b >= 0x20 && b <= 0x7E t.Logf(" [%d]: 0x%02X printable=%v", i, b, isPrintable) } } } }) } } func TestPrintableASCIILengthBytes(t *testing.T) { tests := []struct { name string input []byte expected int desc string }{ // Any length works - returns exact count {"empty slice", []byte{}, 0, "Empty slice has 0 printable bytes"}, {"single space", []byte{0x20}, 1, "Single space"}, {"single char", []byte("a"), 1, "Single printable byte"}, {"7 bytes", []byte("1234567"), 7, "7 printable bytes"}, {"8 bytes all printable", []byte("12345678"), 8, "8 bytes all printable"}, {"9 bytes all printable", []byte("123456789"), 9, "9 printable bytes"}, {"15 bytes all printable", []byte("123456789012345"), 15, "15 printable bytes"}, {"16 bytes all printable", []byte("1234567890123456"), 16, "16 printable bytes"}, {"17 bytes all printable", []byte("12345678901234567"), 17, "17 printable bytes"}, {"24 bytes all printable", []byte("123456789012345678901234"), 24, "24 printable bytes"}, {"long all printable", []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()"), 72, "72 printable bytes"}, {"all printable range", []byte(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"), 95, "All 95 printable ASCII chars"}, // Printable boundaries {"8 bytes all spaces", []byte(" "), 8, "8 spaces"}, {"8 bytes all tildes", []byte("~~~~~~~~"), 8, "8 tildes"}, {"8 bytes boundary low", []byte{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 8, "8 spaces (0x20)"}, {"8 bytes boundary high", []byte{0x7E, 0x7E, 0x7E, 0x7E, 0x7E, 0x7E, 0x7E, 0x7E}, 8, "8 tildes (0x7E)"}, // Non-printable at start -> 0 {"control at start", []byte("\x00hello world"), 0, "Control char at start"}, {"DEL at start", []byte("\x7Fhello world"), 0, "DEL at start"}, {"non-ASCII at start", []byte("\x80hello world"), 0, "Non-ASCII at start"}, {"UTF-8 at start", []byte("\xC2\xA0hello world"), 0, "UTF-8 at start"}, {"emoji at start", []byte("\xF0\x9F\x98\x80hello123"), 0, "Emoji at start"}, // Non-printable in middle - stops before it {"control in middle", []byte("hel\x00o123"), 3, "Control at pos 3, returns 3"}, {"DEL in middle", []byte("hel\x7Fo123"), 3, "DEL at pos 3, returns 3"}, {"control at pos 8", []byte("12345678\x00world"), 8, "Control at pos 8, returns 8"}, {"DEL at pos 8", []byte("12345678\x7Fworld"), 8, "DEL at pos 8, returns 8"}, {"control at pos 15", []byte("123456789012345\x00"), 15, "Control at pos 15, returns 15"}, // Non-ASCII at end - backs off by 1 {"non-ASCII at end of 9", []byte("12345678\x80"), 7, "Backs off 1 before non-ASCII"}, {"non-ASCII at end of 17", []byte("1234567890123456\x80"), 15, "Backs off 1 before non-ASCII"}, {"16 ASCII then emoji", []byte("1234567890123456\xF0\x9F\x98\x80"), 15, "Backs off 1 before emoji"}, {"16 ASCII then combining acute", []byte("1234567890123456\u0301"), 15, "Backs off 1 before combining mark"}, {"16 ASCII then combining grave", []byte("1234567890123456\u0300"), 15, "Backs off 1 before combining mark"}, {"non-ASCII after 1", []byte("a\x80"), 0, "Backs off 1 from 1, returns 0"}, // Control characters at various positions {"control at pos 0", []byte("\x00234567890"), 0, "Control at position 0"}, {"control at pos 3", []byte("123\x00567890"), 3, "Control at position 3"}, {"control at pos 7", []byte("1234567\x000"), 7, "Control at position 7"}, // DEL at various positions {"DEL at pos 0", []byte("\x7F234567890"), 0, "DEL at position 0"}, {"DEL at pos 3", []byte("123\x7F567890"), 3, "DEL at position 3"}, {"DEL at pos 7", []byte("1234567\x7F0"), 7, "DEL at position 7"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := printableASCIILength(tt.input) if got != tt.expected { t.Errorf("printableASCIILength(%v) = %d, want %d (%s)", tt.input, got, tt.expected, tt.desc) if len(tt.input) > 0 { t.Logf(" Slice length: %d bytes", len(tt.input)) for i, b := range tt.input { isPrintable := b >= 0x20 && b <= 0x7E t.Logf(" [%d]: 0x%02X printable=%v", i, b, isPrintable) } } } }) } } // TestPrintableASCIIOptimization verifies that the partial ASCII optimization // in String() and Bytes() works correctly for printable ASCII content. func TestPrintableASCIIOptimization(t *testing.T) { tests := []struct { name string input string }{ {"empty", ""}, {"single char", "a"}, {"short ASCII", "hello"}, {"long ASCII", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"}, {"with spaces", "hello world"}, {"with punctuation", "Hello, World!"}, {"all printable range", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {"exactly 8 bytes", "12345678"}, {"exactly 16 bytes", "1234567890123456"}, {"exactly 24 bytes", "123456789012345678901234"}, {"7 bytes", "1234567"}, {"9 bytes", "123456789"}, {"15 bytes", "123456789012345"}, {"17 bytes", "12345678901234567"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // For printable ASCII, width should equal length width := String(tt.input) expected := len(tt.input) if width != expected { t.Errorf("String(%q) = %d, want %d", tt.input, width, expected) } // Same for Bytes widthBytes := Bytes([]byte(tt.input)) if widthBytes != expected { t.Errorf("Bytes(%q) = %d, want %d", tt.input, widthBytes, expected) } }) } } // TestUnicode16IndicConjunctBreak tests Unicode 16.0 Indic_Conjunct_Break property. // This property affects grapheme cluster breaking in Indic scripts, ensuring that // conjuncts (consonant clusters) are properly grouped into single grapheme clusters. // The Indic_Conjunct_Break property has values: Consonant, Linker, and Extend. // // Note: Indic scripts are typically width 1 (not width 2 like CJK). The key test // here is that grapheme clusters are formed correctly according to Indic_Conjunct_Break // rules, not the width value itself. func TestUnicode16IndicConjunctBreak(t *testing.T) { tests := []struct { name string input string expectedWidth int expectedClusters int // Expected number of grapheme clusters description string verifyClusterFormation bool // Whether to verify the cluster contains expected runes }{ // Devanagari (Hindi, Sanskrit) - Unicode range U+0900-U+097F { name: "Devanagari conjunct कà¥à¤·", input: "कà¥à¤·", // ká¹£a - क (ka) + virama + ष (á¹£a) expectedWidth: 1, // Indic scripts are width 1 expectedClusters: 1, // Should form single grapheme cluster description: "Devanagari conjunct formed with virama (U+094D) - should be single cluster", verifyClusterFormation: true, }, { name: "Devanagari conjunct तà¥à¤°", input: "तà¥à¤°", // tra - त (ta) + virama + र (ra) expectedWidth: 1, expectedClusters: 1, description: "Devanagari conjunct with र (ra) as subscript - should be single cluster", verifyClusterFormation: true, }, { name: "Devanagari conjunct जà¥à¤ž", input: "जà¥à¤ž", // jña - ज (ja) + virama + ञ (ña) expectedWidth: 1, expectedClusters: 1, description: "Devanagari conjunct जà¥à¤ž - should be single cluster", verifyClusterFormation: true, }, { name: "Devanagari word with conjuncts", input: "कà¥à¤·à¤¤à¥à¤°à¤¿à¤¯", // ká¹£atriya - contains conjunct कà¥à¤· expectedWidth: 3, // 3 grapheme clusters × 1 width each expectedClusters: 3, // कà¥à¤·, तà¥à¤°à¤¿, य description: "Devanagari word with multiple conjuncts", }, { name: "Devanagari with repha", input: "राम", // rÄma - र (ra) can form repha in some contexts expectedWidth: 2, // 2 grapheme clusters × 1 width each expectedClusters: 2, // रा, म description: "Devanagari with potential repha formation", }, // Bengali (Bangla) - Unicode range U+0980-U+09FF { name: "Bengali conjunct কà§à¦·", input: "কà§à¦·", // ká¹£a - ক (ka) + virama + ষ (á¹£a) expectedWidth: 1, expectedClusters: 1, description: "Bengali conjunct কà§à¦· - should be single cluster", verifyClusterFormation: true, }, { name: "Bengali conjunct জà§à¦ž", input: "জà§à¦ž", // jña - জ (ja) + virama + ঞ (ña) expectedWidth: 1, expectedClusters: 1, description: "Bengali conjunct জà§à¦ž - should be single cluster", verifyClusterFormation: true, }, { name: "Bengali word", input: "বাংলা", // bÄá¹…lÄ - Bengali expectedWidth: 2, // 2 grapheme clusters × 1 width each expectedClusters: 2, // বাং, লা description: "Bengali word with conjuncts", }, // Tamil - Unicode range U+0B80-U+0BFF // Tamil typically uses visible viramas rather than fused conjuncts // Note: Tamil may break differently - virama may form separate cluster { name: "Tamil with virama", input: "கà¯à®·", // ká¹£a - க (ka) + virama + à®· (á¹£a) expectedWidth: 2, // May break into 2 clusters: கà¯, à®· expectedClusters: 2, // Tamil virama handling may differ description: "Tamil conjunct with visible virama - may break into multiple clusters", verifyClusterFormation: false, }, { name: "Tamil word", input: "தமிழà¯", // tamiḻ - Tamil expectedWidth: 3, // 3 grapheme clusters × 1 width each expectedClusters: 3, // த, மி, ழ௠description: "Tamil word", }, // Telugu - Unicode range U+0C00-U+0C7F { name: "Telugu conjunct à°•à±à°·", input: "à°•à±à°·", // ká¹£a - à°• (ka) + virama + à°· (á¹£a) expectedWidth: 1, expectedClusters: 1, description: "Telugu conjunct à°•à±à°· - should be single cluster", verifyClusterFormation: true, }, { name: "Telugu word", input: "తెలà±à°—à±", // telugu expectedWidth: 3, // 3 grapheme clusters × 1 width each expectedClusters: 3, // తె, à°²à±, à°—à± description: "Telugu word", }, // Gujarati - Unicode range U+0A80-U+0AFF { name: "Gujarati conjunct કà«àª·", input: "કà«àª·", // ká¹£a - ક (ka) + virama + ષ (á¹£a) expectedWidth: 1, expectedClusters: 1, description: "Gujarati conjunct કà«àª· - should be single cluster", verifyClusterFormation: true, }, { name: "Gujarati word", input: "ગà«àªœàª°àª¾àª¤à«€", // gujarÄtÄ« expectedWidth: 4, // 4 grapheme clusters × 1 width each expectedClusters: 4, // ગà«, જ, રા, તી description: "Gujarati word", }, // Kannada - Unicode range U+0C80-U+0CFF // Note: Some Kannada conjuncts may break differently depending on Indic_Conjunct_Break implementation { name: "Kannada conjunct ಕà³à²·", input: "ಕà³à²·", // ká¹£a - ಕ (ka) + virama + ಷ (á¹£a) expectedWidth: 2, // May break into 2 clusters: ಕà³, ಷ expectedClusters: 2, // Kannada virama handling may differ description: "Kannada conjunct ಕà³à²· - may break into multiple clusters", verifyClusterFormation: false, }, { name: "Kannada word", input: "ಕನà³à²¨à²¡", // kannada expectedWidth: 4, // 4 grapheme clusters × 1 width each expectedClusters: 4, // ಕ, ನà³, ನ, ಡ description: "Kannada word", }, // Malayalam - Unicode range U+0D00-U+0D7F { name: "Malayalam conjunct à´•àµà´·", input: "à´•àµà´·", // ká¹£a - à´• (ka) + virama + à´· (á¹£a) expectedWidth: 1, expectedClusters: 1, description: "Malayalam conjunct à´•àµà´· - should be single cluster", verifyClusterFormation: true, }, { name: "Malayalam word", input: "മലയാളം", // malayÄḷaá¹ expectedWidth: 4, // 4 grapheme clusters × 1 width each expectedClusters: 4, // à´®, à´², യാ, ളം description: "Malayalam word", }, // Mixed Indic scripts { name: "Mixed Indic scripts", input: "कà¥à¤· বাংলা தமிழà¯", // Devanagari + Bengali + Tamil expectedWidth: 8, // 1 + space + 2 + space + 3 expectedClusters: 8, // कà¥à¤·, space, বাং, লা, space, த, மி, ழ௠description: "Mixed Indic scripts with spaces", }, // Test that virama (U+094D in Devanagari) doesn't break grapheme cluster { name: "Devanagari with explicit virama", input: "कà¥", // ka + virama (should be part of grapheme cluster) expectedWidth: 1, expectedClusters: 1, description: "Devanagari consonant with virama (no following consonant) - should be single cluster", verifyClusterFormation: true, }, // Test Indic script with combining marks (should still form single grapheme) { name: "Devanagari with vowel sign", input: "का", // kÄ - क (ka) + ा (Ä vowel sign) expectedWidth: 1, expectedClusters: 1, description: "Devanagari with combining vowel sign - should be single cluster", verifyClusterFormation: true, }, { name: "Bengali with vowel sign", input: "কা", // kÄ - ক (ka) + া (Ä vowel sign) expectedWidth: 1, expectedClusters: 1, description: "Bengali with combining vowel sign - should be single cluster", verifyClusterFormation: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Test String width got := String(tt.input) if got != tt.expectedWidth { t.Errorf("String(%q) = %d, want %d (%s)", tt.input, got, tt.expectedWidth, tt.description) } // Test Bytes width gotBytes := Bytes([]byte(tt.input)) if gotBytes != tt.expectedWidth { t.Errorf("Bytes(%q) = %d, want %d (%s)", tt.input, gotBytes, tt.expectedWidth, tt.description) } // Verify grapheme cluster formation (key test for Indic_Conjunct_Break) iter := StringGraphemes(tt.input) sumWidth := 0 clusterCount := 0 var clusters []string for iter.Next() { clusterCount++ width := iter.Width() sumWidth += width clusters = append(clusters, iter.Value()) } if clusterCount != tt.expectedClusters { t.Errorf("Number of grapheme clusters = %d, want %d (%s)", clusterCount, tt.expectedClusters, tt.description) for i, cluster := range clusters { t.Logf(" Cluster %d: %q (width %d)", i+1, cluster, String(cluster)) } } if sumWidth != tt.expectedWidth { t.Errorf("Sum of grapheme cluster widths = %d, want %d", sumWidth, tt.expectedWidth) } // For conjuncts, verify they form a single cluster (Indic_Conjunct_Break behavior) if tt.verifyClusterFormation && clusterCount != 1 { t.Errorf("Expected single grapheme cluster for conjunct, got %d clusters: %v", clusterCount, clusters) } // Verify that the input string can be reconstructed from clusters reconstructed := "" iter2 := StringGraphemes(tt.input) for iter2.Next() { reconstructed += iter2.Value() } if reconstructed != tt.input { t.Errorf("Reconstructed string from clusters = %q, want %q", reconstructed, tt.input) } }) } } func TestReproduceFuzzTruncate(t *testing.T) { // Regression test: \x1bX (ESC X = SOS) is segmented as one grapheme in the // full input but as two separate graphemes (\x1b + X) in the truncated // result, causing the preserved escape sequence to add visible width. text := "00000000000\x1bX\x18" options := []Options{ {EastAsianWidth: false}, {EastAsianWidth: true}, {ControlSequences: true}, {EastAsianWidth: true, ControlSequences: true}, } for _, opt := range options { ts := opt.TruncateString(text, 10, "...") w := opt.String(ts) if w > 10 { t.Errorf("TruncateString() returned string longer than maxWidth for %q with opts %+v: %q (width %d)", text, opt, ts, w) } tb := opt.TruncateBytes([]byte(text), 10, []byte("...")) if !bytes.Equal(tb, []byte(ts)) { t.Errorf("TruncateBytes() != TruncateString() for %q with opts %+v: %q != %q", text, opt, tb, ts) } } } func TestTruncateIgnores8Bit(t *testing.T) { // Truncation ignores ControlSequences8Bit entirely (see GoDoc). // This means the truncation result, when measured with 8-bit-aware // String(), may exceed maxWidth. This is the documented tradeoff: // 8-bit C1 bytes (0x80-0x9F) overlap with UTF-8 multi-byte encoding, // so manipulating them during truncation is unsafe. // // These tests verify that truncation is self-consistent: the result // measured WITHOUT 8-bit should respect maxWidth. cases := []struct { name string text string }{ { // Byte recombination: the grapheme parser with 8-bit groups // \x9f\xcf as one escape (APC + payload). Without 8-bit, \xcf // and \x90 can recombine into U+03D0 (Ï, width 1). name: "byte recombination", text: "000000000000000000000\x9f\xcf\x1a\x90", }, { // SOS terminator mismatch: with 8-bit, \x9c is ST (terminates // the 7-bit SOS started by \x1bX). Without 8-bit, \x9c is not // recognized as ST, so SOS consumes more of the string. name: "SOS terminator mismatch", text: "00\x98\x1bX\x9c0000000000\x18", }, } options := []Options{ {ControlSequences8Bit: true}, {ControlSequences: true, ControlSequences8Bit: true}, {EastAsianWidth: true, ControlSequences8Bit: true}, } for _, tc := range cases { for _, opt := range options { // Truncation ignores 8-bit, so measure with the same view measureOpt := opt measureOpt.ControlSequences8Bit = false ts := opt.TruncateString(tc.text, 10, "...") w := measureOpt.String(ts) if w > 10 { t.Errorf("%s: TruncateString() width %d > 10 (measured without 8-bit) for %q with opts %+v: %q", tc.name, w, tc.text, opt, ts) } tb := opt.TruncateBytes([]byte(tc.text), 10, []byte("...")) bw := measureOpt.Bytes(tb) if bw > 10 { t.Errorf("%s: TruncateBytes() width %d > 10 (measured without 8-bit) for %q with opts %+v: %q", tc.name, bw, tc.text, opt, tb) } if !bytes.Equal(tb, []byte(ts)) { t.Errorf("%s: TruncateBytes() != TruncateString() for %q with opts %+v: %q != %q", tc.name, tc.text, opt, tb, ts) } } } }