././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1770549336.7094433 jaconv-0.5.0/0000755000076500000240000000000015142070131012227 5ustar00yukinostaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770549264.0 jaconv-0.5.0/CHANGES.rst0000644000076500000240000000434615142070020014035 0ustar00yukinostaffCHANGES ======= 0.5.0 (2026-02-08) ------------------- - Add new func. enlarge_smallkana. - The alphabet2kana func. converts "si" to "し". - Fix conversion bugs in alphabet2kana and kana2alphabet func. - Fix bugs about small-kana in kana2alphabet func. - Add docstring to alias functions 0.4.1 (2025-11-30) ------------------- - port static configs to setup.cfg (thanks @eli-schwartz) - migrate testsuite to pytest (thanks @eli-schwartz) - Support Python 3.13 and 3.14 0.4.0 (2024-07-26) ------------------- - Support Python 3.12 - Add stub files according to PEP 561 for mypy (thanks @ernix) 0.3.4 (2023-02-18) ------------------- - Fix to support Python2.7 ~ 3.4 (thanks @manjuu-eater) - Support Python 3.11 0.3.3 (2022-12-31) ------------------- - Support Python 3.10 - Re-support Python2.7 ~ 3.4 (thanks @manjuu-eater) - Fix z2h, h2z all flag off bug (thanks @manjuu-eater) 0.3.1 (2022-12-14) ------------------- - Fix alpha2kana infinite loop bug (thanks @frog42) 0.3 (2021-03-29) ------------------- - Fix bug (alphabet2kana) thanks @Cuddlemuffin007 - Support Python 3.8 and 3.9 - Add handy functions: alphabet2kata and kata2alphabet. thanks @kokimame - Add function for julius: hiragana2julius 0.2.4 (2018-02-04) ------------------- - Fix bug (kana2alphabet) - Support Python 3.7 - No longer support Python 2.6 - Add aliases of z2h -> zenkaku2hankaku and h2z -> hankaku2zenkaku 0.2.3 (2018-02-03) ------------------- - Fix bugs (alphabet2kana, kana2alphabet) thanks @letuananh 0.2.2 (2018-01-22) ------------------- - Fix bug (kana2alphabet) thanks @kokimame - Support Python 3.6 0.2.1 (2017-09-14) ------------------- - Fix bugs (alphabet2kana, kana2alphabet) 0.2 (2015-04-02) ------------------ - Change module name jctconv -> jaconv - Add alphabet and hiragana interconvert (alphabet2kana, kana2alphabet) 0.1.1 (2015-03-12) ------------------ - Support Windows - Support Python 3.5 0.1 (2014-11-24) ------------------ - Add some Japanese characters to convert table (ゝゞ・「」。、) - Decresing memory usage - Some function names are deprecated (hankaku2zenkaku, zenkaku2hankaku, H2K, H2hK, K2H) 0.0.7 (2014-03-22) ------------------ z2h and h2z allow mojimoji-like target character type determination. Bug fix about Half Kana conversion. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1671018072.0 jaconv-0.5.0/LICENSE0000644000076500000240000000207114346333130013242 0ustar00yukinostaffThe MIT License (MIT) Copyright (c) 2014 Yukino Ikegami Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1764453817.0 jaconv-0.5.0/MANIFEST.in0000644000076500000240000000006515112666671014007 0ustar00yukinostaffinclude *.rst include LICENSE include test_jaconv.py ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1770549336.7093208 jaconv-0.5.0/PKG-INFO0000644000076500000240000002124415142070131013327 0ustar00yukinostaffMetadata-Version: 2.4 Name: jaconv Version: 0.5.0 Summary: Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, Zenkaku and more Home-page: https://github.com/ikegami-yukino/jaconv Author: Yukino Ikegami Author-email: yknikgm@gmail.com License: MIT License Keywords: Japanese converter,Japanese,text preprocessing,half-width kana,Hiragana,Katakana,Hankaku,Zenkaku,transliteration,Julius Platform: POSIX Platform: Windows Platform: Unix Platform: MacOS Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Information Technology Classifier: Natural Language :: Japanese Classifier: Operating System :: MacOS Classifier: Operating System :: Microsoft Classifier: Operating System :: POSIX Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: 3.14 Classifier: Topic :: Text Processing Classifier: Topic :: Text Processing :: General License-File: LICENSE Dynamic: description Dynamic: license-file Dynamic: summary jaconv ========== |version| |pyversion| |license| |download| |usedby| |githubstars| |nowar| |nonuke| jaconv (Japanese Converter) is interconverter for Hiragana, Katakana, Hankaku (half-width character) and Zenkaku (full-width character) `Japanese README `_ is available. INSTALLATION ============== :: $ pip install jaconv USAGE ============ See also `document `_ .. code:: python import jaconv # Hiragana to Katakana jaconv.hira2kata('ともえまみ') # => 'トモエマミ' # Hiragana to half-width Katakana jaconv.hira2hkata('ともえまみ') # => 'トモエマミ' # Katakana to Hiragana jaconv.kata2hira('巴マミ') # => '巴まみ' # half-width character to full-width character # default parameters are followings: kana=True, ascii=False, digit=False jaconv.h2z('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # half-width character to full-width character # but only ascii characters jaconv.h2z('abc', kana=False, ascii=True, digit=False) # => 'abc' # half-width character to full-width character # but only digit characters jaconv.h2z('123', kana=False, ascii=False, digit=True) # => '123' # half-width character to full-width character # except half-width Katakana jaconv.h2z('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # an alias of h2z jaconv.hankaku2zenkaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # full-width character to half-width character # default parameters are followings: kana=True, ascii=False, digit=False jaconv.z2h('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # full-width character to half-width character # but only ascii characters jaconv.z2h('abc', kana=False, ascii=True, digit=False) # => 'abc' # full-width character to half-width character # but only digit characters jaconv.z2h('123', kana=False, ascii=False, digit=True) # => '123' # full-width character to half-width character # except full-width Katakana jaconv.z2h('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # an alias of z2h jaconv.zenkaku2hankaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # normalize jaconv.normalize('ティロ・フィナ〜レ', 'NFKC') # => 'ティロ・フィナーレ' # Convert small Hiragana or Katakana to normal size jaconv.enlarge_smallkana('わぁい') # => 'わあい' jaconv.enlarge_smallkana('きょういっぱい', ignore='っ') # => 'きよういっぱい' # Hiragana to alphabet jaconv.kana2alphabet('じゃぱん') # => 'japan' # Alphabet to Hiragana jaconv.alphabet2kana('japan') # => 'じゃぱん' # Katakana to Alphabet jaconv.kata2alphabet('ケツイ') # => 'ketsui' # Alphabet to Katakana jaconv.alphabet2kata('namba') # => 'ナンバ' # Hiragana to Julius's phoneme format jaconv.hiragana2julius('てんきすごくいいいいいい') # => 't e N k i s u g o k u i:' NOTE ============ jaconv.normalize method expand unicodedata.normalize for Japanese language processing. .. code:: '〜' => 'ー' '~' => 'ー' "’" => "'" '”'=> '"' '“' => '``' '―' => '-' '‐' => '-' '˗' => '-' '֊' => '-' '‐' => '-' '‑' => '-' '‒' => '-' '–' => '-' '⁃' => '-' '⁻' => '-' '₋' => '-' '−' => '-' '﹣' => 'ー' '-' => 'ー' '—' => 'ー' '―' => 'ー' '━' => 'ー' '─' => 'ー' .. |pyversion| image:: https://img.shields.io/pypi/pyversions/jaconv.svg .. |version| image:: https://img.shields.io/pypi/v/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: latest version .. |license| image:: https://img.shields.io/pypi/l/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: license .. |download| image:: https://static.pepy.tech/personalized-badge/jaconv?period=total&units=international_system&left_color=black&right_color=blue&left_text=Downloads :target: https://pepy.tech/project/jaconv :alt: download .. |usedby| image:: https://img.shields.io/github/search?query=import%20jaconv%20language%3Apython&label=Used%20in%20GitHub :target: https://github.com/search?q=import+jaconv+language%3Apython&type=code :alt: GitHub code search count .. |githubstars| image:: https://img.shields.io/github/stars/ikegami-yukino/jaconv :target: https://github.com/ikegami-yukino/jaconv :alt: GitHub Repo stars .. |nowar| image:: https://img.shields.io/badge/%F0%9F%A4%9D%20NO%20WAR-FF0000?style=plastic :alt: NO WAR budge .. |nonuke| image:: https://img.shields.io/badge/%E2%98%A2%20NO%20NUKE-FFFF00?style=plastic :alt: NO NUKE budge CHANGES ======= 0.5.0 (2026-02-08) ------------------- - Add new func. enlarge_smallkana. - The alphabet2kana func. converts "si" to "し". - Fix conversion bugs in alphabet2kana and kana2alphabet func. - Fix bugs about small-kana in kana2alphabet func. - Add docstring to alias functions 0.4.1 (2025-11-30) ------------------- - port static configs to setup.cfg (thanks @eli-schwartz) - migrate testsuite to pytest (thanks @eli-schwartz) - Support Python 3.13 and 3.14 0.4.0 (2024-07-26) ------------------- - Support Python 3.12 - Add stub files according to PEP 561 for mypy (thanks @ernix) 0.3.4 (2023-02-18) ------------------- - Fix to support Python2.7 ~ 3.4 (thanks @manjuu-eater) - Support Python 3.11 0.3.3 (2022-12-31) ------------------- - Support Python 3.10 - Re-support Python2.7 ~ 3.4 (thanks @manjuu-eater) - Fix z2h, h2z all flag off bug (thanks @manjuu-eater) 0.3.1 (2022-12-14) ------------------- - Fix alpha2kana infinite loop bug (thanks @frog42) 0.3 (2021-03-29) ------------------- - Fix bug (alphabet2kana) thanks @Cuddlemuffin007 - Support Python 3.8 and 3.9 - Add handy functions: alphabet2kata and kata2alphabet. thanks @kokimame - Add function for julius: hiragana2julius 0.2.4 (2018-02-04) ------------------- - Fix bug (kana2alphabet) - Support Python 3.7 - No longer support Python 2.6 - Add aliases of z2h -> zenkaku2hankaku and h2z -> hankaku2zenkaku 0.2.3 (2018-02-03) ------------------- - Fix bugs (alphabet2kana, kana2alphabet) thanks @letuananh 0.2.2 (2018-01-22) ------------------- - Fix bug (kana2alphabet) thanks @kokimame - Support Python 3.6 0.2.1 (2017-09-14) ------------------- - Fix bugs (alphabet2kana, kana2alphabet) 0.2 (2015-04-02) ------------------ - Change module name jctconv -> jaconv - Add alphabet and hiragana interconvert (alphabet2kana, kana2alphabet) 0.1.1 (2015-03-12) ------------------ - Support Windows - Support Python 3.5 0.1 (2014-11-24) ------------------ - Add some Japanese characters to convert table (ゝゞ・「」。、) - Decresing memory usage - Some function names are deprecated (hankaku2zenkaku, zenkaku2hankaku, H2K, H2hK, K2H) 0.0.7 (2014-03-22) ------------------ z2h and h2z allow mojimoji-like target character type determination. Bug fix about Half Kana conversion. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770546192.0 jaconv-0.5.0/README.rst0000644000076500000240000001155615142062020013724 0ustar00yukinostaffjaconv ========== |version| |pyversion| |license| |download| |usedby| |githubstars| |nowar| |nonuke| jaconv (Japanese Converter) is interconverter for Hiragana, Katakana, Hankaku (half-width character) and Zenkaku (full-width character) `Japanese README `_ is available. INSTALLATION ============== :: $ pip install jaconv USAGE ============ See also `document `_ .. code:: python import jaconv # Hiragana to Katakana jaconv.hira2kata('ともえまみ') # => 'トモエマミ' # Hiragana to half-width Katakana jaconv.hira2hkata('ともえまみ') # => 'トモエマミ' # Katakana to Hiragana jaconv.kata2hira('巴マミ') # => '巴まみ' # half-width character to full-width character # default parameters are followings: kana=True, ascii=False, digit=False jaconv.h2z('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # half-width character to full-width character # but only ascii characters jaconv.h2z('abc', kana=False, ascii=True, digit=False) # => 'abc' # half-width character to full-width character # but only digit characters jaconv.h2z('123', kana=False, ascii=False, digit=True) # => '123' # half-width character to full-width character # except half-width Katakana jaconv.h2z('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # an alias of h2z jaconv.hankaku2zenkaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # full-width character to half-width character # default parameters are followings: kana=True, ascii=False, digit=False jaconv.z2h('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # full-width character to half-width character # but only ascii characters jaconv.z2h('abc', kana=False, ascii=True, digit=False) # => 'abc' # full-width character to half-width character # but only digit characters jaconv.z2h('123', kana=False, ascii=False, digit=True) # => '123' # full-width character to half-width character # except full-width Katakana jaconv.z2h('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # an alias of z2h jaconv.zenkaku2hankaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # normalize jaconv.normalize('ティロ・フィナ〜レ', 'NFKC') # => 'ティロ・フィナーレ' # Convert small Hiragana or Katakana to normal size jaconv.enlarge_smallkana('わぁい') # => 'わあい' jaconv.enlarge_smallkana('きょういっぱい', ignore='っ') # => 'きよういっぱい' # Hiragana to alphabet jaconv.kana2alphabet('じゃぱん') # => 'japan' # Alphabet to Hiragana jaconv.alphabet2kana('japan') # => 'じゃぱん' # Katakana to Alphabet jaconv.kata2alphabet('ケツイ') # => 'ketsui' # Alphabet to Katakana jaconv.alphabet2kata('namba') # => 'ナンバ' # Hiragana to Julius's phoneme format jaconv.hiragana2julius('てんきすごくいいいいいい') # => 't e N k i s u g o k u i:' NOTE ============ jaconv.normalize method expand unicodedata.normalize for Japanese language processing. .. code:: '〜' => 'ー' '~' => 'ー' "’" => "'" '”'=> '"' '“' => '``' '―' => '-' '‐' => '-' '˗' => '-' '֊' => '-' '‐' => '-' '‑' => '-' '‒' => '-' '–' => '-' '⁃' => '-' '⁻' => '-' '₋' => '-' '−' => '-' '﹣' => 'ー' '-' => 'ー' '—' => 'ー' '―' => 'ー' '━' => 'ー' '─' => 'ー' .. |pyversion| image:: https://img.shields.io/pypi/pyversions/jaconv.svg .. |version| image:: https://img.shields.io/pypi/v/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: latest version .. |license| image:: https://img.shields.io/pypi/l/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: license .. |download| image:: https://static.pepy.tech/personalized-badge/jaconv?period=total&units=international_system&left_color=black&right_color=blue&left_text=Downloads :target: https://pepy.tech/project/jaconv :alt: download .. |usedby| image:: https://img.shields.io/github/search?query=import%20jaconv%20language%3Apython&label=Used%20in%20GitHub :target: https://github.com/search?q=import+jaconv+language%3Apython&type=code :alt: GitHub code search count .. |githubstars| image:: https://img.shields.io/github/stars/ikegami-yukino/jaconv :target: https://github.com/ikegami-yukino/jaconv :alt: GitHub Repo stars .. |nowar| image:: https://img.shields.io/badge/%F0%9F%A4%9D%20NO%20WAR-FF0000?style=plastic :alt: NO WAR budge .. |nonuke| image:: https://img.shields.io/badge/%E2%98%A2%20NO%20NUKE-FFFF00?style=plastic :alt: NO NUKE budge ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770546192.0 jaconv-0.5.0/README_JP.rst0000644000076500000240000001125115142062020014305 0ustar00yukinostaffjaconv ========== |version| |pyversion| |license| |download| |usedby| |githubstars| |nowar| |nonuke| jaconv (Japanese Converter) はひらがな・カタカナ・全角・半角の文字種変換を高速に行います。 Pythonのみで実装されているので、Cコンパイラが使えない環境でも利用できます。 INSTALLATION ============== :: $ pip install jaconv USAGE ============ .. code:: python import jaconv # ひらがな to カタカナ jaconv.hira2kata('ともえまみ') # => 'トモエマミ' # ひらがな to 半角カタカナ jaconv.hira2hkata('ともえまみ') # => 'トモエマミ' # カタカナ to ひらがな jaconv.kata2hira('巴マミ') # => '巴まみ' # 半角かな文字 to 全角かな文字 jaconv.h2z('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # 半角ASCII to 全角ASCII jaconv.h2z('abc', kana=False, ascii=True, digit=False) # => 'abc' # 数字以外の半角文字 to 全角文字 jaconv.h2z('123', kana=False, ascii=False, digit=True) # => '123' # カタカナ以外の半角文字 to 全角文字 jaconv.h2z('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # h2zのエイリアス jaconv.hankaku2zenkaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # 全角かな文字 to 半角かな文字 jaconv.z2h('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # 全角ASCII to 半角ASCII jaconv.z2h('abc', kana=False, ascii=True, digit=False) # => 'abc' # 全角アラビア数字 to 半角アラビア数字 jaconv.z2h('123', kana=False, ascii=False, digit=True) # => '123' # カタカナ以外の全角文字 to 半角文字 jaconv.z2h('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # z2hのエイリアス jaconv.zenkaku2hankaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # normalize jaconv.normalize('ティロ・フィナ〜レ', 'NFKC') # => 'ティロ・フィナーレ' # 小文字のひらがなやカタカナを大文字のかなに変換 jaconv.enlarge_smallkana('わぁい') # => 'わあい' jaconv.enlarge_smallkana('きょういっぱい', ignore='っ') # => 'きよういっぱい' # ひらがな to アルファベット jaconv.kana2alphabet('じゃぱん') # => japan # アルファベット to ひらがな jaconv.alphabet2kana('japan') # => じゃぱん # カタカナ to アルファベット jaconv.kata2alphabet('ケツイ') # => 'ketsui' # アルファベット to カタカナ jaconv.alphabet2kata('namba') # => 'ナンバ' # ひらがな to Juliusの音素表現 jaconv.hiragana2julius('てんきすごくいいいいいい') # => 't e N k i s u g o k u i:' NOTE ============ normalize メソッドは、Python の unicodedata.normalize をベースに日本語処理向けに特化した拡張を行っています。 具体的には以下の変換処理を追加して実行します。 .. code:: '〜' => 'ー' '~' => 'ー' "’" => "'" '”'=> '"' '“' => '``' '―' => '-' '‐' => '-' '˗' => '-' '֊' => '-' '‐' => '-' '‑' => '-' '‒' => '-' '–' => '-' '⁃' => '-' '⁻' => '-' '₋' => '-' '−' => '-' '﹣' => 'ー' '-' => 'ー' '—' => 'ー' '―' => 'ー' '━' => 'ー' '─' => 'ー' .. |pyversion| image:: https://img.shields.io/pypi/pyversions/jaconv.svg .. |version| image:: https://img.shields.io/pypi/v/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: latest version .. |license| image:: https://img.shields.io/pypi/l/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: license .. |download| image:: https://static.pepy.tech/personalized-badge/jaconv?period=total&units=international_system&left_color=black&right_color=blue&left_text=Downloads :target: https://pepy.tech/project/jaconv :alt: download .. |usedby| image:: https://img.shields.io/github/search?query=import%20jaconv%20language%3Apython&label=Used%20in%20GitHub :target: https://github.com/search?q=import+jaconv+language%3Apython&type=code :alt: GitHub code search count .. |githubstars| image:: https://img.shields.io/github/stars/ikegami-yukino/jaconv :target: https://github.com/ikegami-yukino/jaconv :alt: GitHub Repo stars .. |nowar| image:: https://img.shields.io/badge/%F0%9F%A4%9D%20NO%20WAR-FF0000?style=plastic :alt: NO WAR budge .. |nonuke| image:: https://img.shields.io/badge/%E2%98%A2%20NO%20NUKE-FFFF00?style=plastic :alt: NO NUKE budge ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1770549336.7079184 jaconv-0.5.0/jaconv/0000755000076500000240000000000015142070131013507 5ustar00yukinostaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770549264.0 jaconv-0.5.0/jaconv/__init__.py0000644000076500000240000000514515142070020015622 0ustar00yukinostaff# -*- coding: utf-8 -*- from __future__ import unicode_literals from . import jaconv """jaconv This module provides Japanese and ASCII character interconverting between Hiragana and full-/half-width Katakana/ASCII characters. Author: Yukino Ikegami Lisence: MIT License Usage: import jaconv jaconv.hira2kata(text, [ignore]) # ひらがなを全角カタカナに変換 jaconv.hira2hkata(text, [ignore]) # ひらがなを半角カタカナに変換 jaconv.kata2hira(text, [ignore]) # 全角カタカナをひらがなに変換 jaconv.enlarge_smallkana(text, [ignore]) # 小文字かなを大文字かなに変換 jaconv.h2z(text, [ignore, kana, ascii, digit]) # 半角文字を全角文字に変換 jaconv.z2h(text, [ignore, kana, ascii, digit]) # 全角文字を半角文字に変換 jaconv.han2zen(text, [ignore, kana, ascii, digit]) # 半角文字を全角文字に変換 jaconv.zen2han(text, [ignore, kana, ascii, digit]) # 全角文字を半角文字に変換 jaconv.normalize(text, [nomalizemode]) # 半角カナを全角カナへ、全角英数字を半角英数字に変換 jaconv.kana2alphabet(text) # かなをローマ字入力アルファベットに変換 jaconv.alphabet2kana(text) # ローマ字入力アルファベットをかなに変換 jaconv.kata2alphabet(text) # カタカナをローマ字入力アルファベットに変換 jaconv.alphabet2kata(text) # ローマ字入力アルファベットをカタカナに変換 jaconv.hiragana2julius(text) # ひらがなをJuliusの音素表現に変換 """ VERSION = (0, 5, 0) __version__ = '0.5.0' __all__ = [ 'hira2kata', 'hira2hkata', 'kata2hira', 'h2z', 'z2h', 'hankaku2zenkaku', 'zenkaku2hankaku', 'han2zen', 'zen2han', 'normalize', 'kana2alphabet', 'alphabet2kana', 'kata2alphabet', 'alphabet2kata', 'hiragana2julius', 'enlarge_smallkana', 'enlargesmallkana', ] hira2kata = jaconv.hira2kata hira2hkata = jaconv.hira2hkata kata2hira = jaconv.kata2hira h2z = jaconv.h2z z2h = jaconv.z2h han2zen = jaconv.han2zen # an alias of h2z zen2han = jaconv.zen2han # an alias of z2h hankaku2zenkaku = jaconv.hankaku2zenkaku # an alias of h2z zenkaku2hankaku = jaconv.zenkaku2hankaku # an alias of z2h normalize = jaconv.normalize kana2alphabet = jaconv.kana2alphabet alphabet2kana = jaconv.alphabet2kana kata2alphabet = jaconv.kata2alphabet alphabet2kata = jaconv.alphabet2kata hiragana2julius = jaconv.hiragana2julius enlargesmallkana = jaconv.enlargesmallkana # an alias of enlarge_smallkana enlarge_smallkana = jaconv.enlarge_smallkana ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/jaconv/__init__.pyi0000644000076500000240000000167315142065620016007 0ustar00yukinostafffrom _typeshed import Incomplete from . import jaconv __all__ = [ 'hira2kata', 'hira2hkata', 'kata2hira', 'h2z', 'z2h', 'hankaku2zenkaku', 'zenkaku2hankaku', 'han2zen', 'zen2han', 'normalize', 'kana2alphabet', 'alphabet2kana', 'kata2alphabet', 'alphabet2kata', 'hiragana2julius', 'enlargesmallkana', 'enlarge_smallkana', ] hira2kata = jaconv.hira2kata hira2hkata = jaconv.hira2hkata kata2hira = jaconv.kata2hira h2z = jaconv.h2z z2h = jaconv.z2h han2zen = jaconv.han2zen zen2han = jaconv.zen2han hankaku2zenkaku = jaconv.hankaku2zenkaku zenkaku2hankaku = jaconv.zenkaku2hankaku normalize = jaconv.normalize kana2alphabet = jaconv.kana2alphabet alphabet2kana = jaconv.alphabet2kana kata2alphabet = jaconv.kata2alphabet alphabet2kata = jaconv.alphabet2kata hiragana2julius = jaconv.hiragana2julius enlargesmallkana = jaconv.enlargesmallkana enlarge_smallkana = jaconv.enlarge_smallkana ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/jaconv/compat.py0000644000076500000240000000036115142065620015353 0ustar00yukinostaff# -*- coding: utf-8 -*- from sys import version_info if version_info < (3,): from itertools import imap, izip map = imap zip = izip from codecs import open open = open else: map = map zip = zip open = open ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/jaconv/compat.pyi0000644000076500000240000000034715142065620015530 0ustar00yukinostafffrom sys import version_info if version_info < (3,): from itertools import imap, izip # type: ignore[attr-defined] map = imap zip = izip from codecs import open open = open else: map = map zip = zip ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/jaconv/conv_table.py0000644000076500000240000001304315142065620016205 0ustar00yukinostaff# -*- coding: utf-8 -*- from __future__ import unicode_literals import re from .compat import map, zip HIRAGANA = list( 'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすず' 'せぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴ' 'ふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろわ' 'をんーゎゐゑゕゖゔゝゞ・「」。、' ) HALF_ASCII = list( '!"#$%&\'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ' '[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ' ) HALF_DIGIT = list('0123456789') HALF_KANA_SEION = list( 'ァアィイゥウェエォオカキクケコサシスセソタチッツテトナニヌネノハヒフヘホマミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヽヾ・「」。、' ) HALF_KANA = [ 'ァ', 'ア', 'ィ', 'イ', 'ゥ', 'ウ', 'ェ', 'エ', 'ォ', 'オ', 'カ', 'ガ', 'キ', 'ギ', 'ク', 'グ', 'ケ', 'ゲ', 'コ', 'ゴ', 'サ', 'ザ', 'シ', 'ジ', 'ス', 'ズ', 'セ', 'ゼ', 'ソ', 'ゾ', 'タ', 'ダ', 'チ', 'ヂ', 'ッ', 'ツ', 'ヅ', 'テ', 'デ', 'ト', 'ド', 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', 'ハ', 'バ', 'パ', 'ヒ', 'ビ', 'ピ', 'フ', 'ブ', 'プ', 'ヘ', 'ベ', 'ペ', 'ホ', 'ボ', 'ポ', 'マ', 'ミ', 'ム', 'メ', 'モ', 'ャ', 'ヤ', 'ュ', 'ユ', 'ョ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ワ', 'ヲ', 'ン', 'ー', 'ヮ', 'ヰ', 'ヱ', 'ヵ', 'ヶ', 'ヴ', 'ヽ', 'ヾ', '・', '「', '」', '。', '、', ] FULL_ASCII = list( '!"#$%&'()*+,-./:;<=>?@' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '[\]^_`abcdefghijklmnopqrst' 'uvwxyz{|}~ ' ) FULL_DIGIT = list('0123456789') FULL_KANA = list( 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソ' 'ゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペ' 'ホボポマミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヴ' 'ヽヾ・「」。、' ) FULL_KANA_SEION = list( 'ァアィイゥウェエォオカキクケコサシスセソタチッツテト' 'ナニヌネノハヒフヘホマミムメモャヤュユョヨラリルレロ' 'ワヲンーヮヰヱヵヶヽヾ・「」。、' ) HEPBURN = list('aiueoaiueonl~-') HEPBURN_KANA = list('ぁぃぅぇぉあいうえおんる〜ー') SMALL_KANA = list('ぁぃぅぇぉゃゅょっァィゥェォヵヶャュョッ') SMALL_KANA_NORMALIZED = list('あいうえおやゆよつアイウエオカケヤユヨツ') def _to_ord_list(chars): return list(map(ord, chars)) HIRAGANA_ORD = _to_ord_list(HIRAGANA) FULL_KANA_ORD = _to_ord_list(FULL_KANA) HALF_ASCII_ORD = _to_ord_list(HALF_ASCII) FULL_ASCII_ORD = _to_ord_list(FULL_ASCII) HALF_DIGIT_ORD = _to_ord_list(HALF_DIGIT) FULL_DIGIT_ORD = _to_ord_list(FULL_DIGIT) HALF_KANA_SEION_ORD = _to_ord_list(HALF_KANA_SEION) FULL_KANA_SEION_ORD = _to_ord_list(FULL_KANA_SEION) SMALL_KANA_ORD = _to_ord_list(SMALL_KANA) def _to_dict(_from, _to): return dict(zip(_from, _to)) H2K_TABLE = _to_dict(HIRAGANA_ORD, FULL_KANA) H2HK_TABLE = _to_dict(HIRAGANA_ORD, HALF_KANA) K2H_TABLE = _to_dict(FULL_KANA_ORD, HIRAGANA) H2Z_A = _to_dict(HALF_ASCII_ORD, FULL_ASCII) H2Z_AD = _to_dict(HALF_ASCII_ORD + HALF_DIGIT_ORD, FULL_ASCII + FULL_DIGIT) H2Z_AK = _to_dict(HALF_ASCII_ORD + HALF_KANA_SEION_ORD, FULL_ASCII + FULL_KANA_SEION) H2Z_D = _to_dict(HALF_DIGIT_ORD, FULL_DIGIT) H2Z_K = _to_dict(HALF_KANA_SEION_ORD, FULL_KANA_SEION) H2Z_DK = _to_dict(HALF_DIGIT_ORD + HALF_KANA_SEION_ORD, FULL_DIGIT + FULL_KANA_SEION) H2Z_ALL = _to_dict( HALF_ASCII_ORD + HALF_DIGIT_ORD + HALF_KANA_SEION_ORD, FULL_ASCII + FULL_DIGIT + FULL_KANA_SEION, ) Z2H_A = _to_dict(FULL_ASCII_ORD, HALF_ASCII) Z2H_AD = _to_dict(FULL_ASCII_ORD + FULL_DIGIT_ORD, HALF_ASCII + HALF_DIGIT) Z2H_AK = _to_dict(FULL_ASCII_ORD + FULL_KANA_ORD, HALF_ASCII + HALF_KANA) Z2H_D = _to_dict(FULL_DIGIT_ORD, HALF_DIGIT) Z2H_K = _to_dict(FULL_KANA_ORD, HALF_KANA) Z2H_DK = _to_dict(FULL_DIGIT_ORD + FULL_KANA_ORD, HALF_DIGIT + HALF_KANA) Z2H_ALL = _to_dict( FULL_ASCII_ORD + FULL_DIGIT_ORD + FULL_KANA_ORD, HALF_ASCII + HALF_DIGIT + HALF_KANA ) KANA2HEP = _to_dict(_to_ord_list(HEPBURN_KANA), HEPBURN) HEP2KANA = _to_dict(_to_ord_list(HEPBURN), HEPBURN_KANA) JULIUS_LONG_VOWEL = tuple( ( (re.compile('( a){2,}'), ' a:'), (re.compile('( i){2,}'), ' i:'), (re.compile('( u){2,}'), ' u:'), (re.compile('( e){2,}'), ' e:'), (re.compile('( o){2,}'), ' o:'), ) ) SMALL_KANA2NORMAL_KANA = _to_dict(SMALL_KANA_ORD, SMALL_KANA_NORMALIZED) del _to_ord_list del _to_dict del HIRAGANA_ORD del HIRAGANA del HALF_KANA del FULL_KANA_ORD del FULL_KANA del HALF_ASCII_ORD del HALF_ASCII del FULL_ASCII_ORD del FULL_ASCII del HALF_DIGIT_ORD del HALF_DIGIT del FULL_DIGIT_ORD del FULL_DIGIT del HALF_KANA_SEION_ORD del HALF_KANA_SEION del FULL_KANA_SEION_ORD del FULL_KANA_SEION del HEPBURN del HEPBURN_KANA del SMALL_KANA del SMALL_KANA_ORD del SMALL_KANA_NORMALIZED ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770546192.0 jaconv-0.5.0/jaconv/conv_table.pyi0000644000076500000240000000102515142062020016342 0ustar00yukinostafffrom _typeshed import Incomplete from .compat import map as map from .compat import zip as zip H2K_TABLE: Incomplete H2HK_TABLE: Incomplete K2H_TABLE: Incomplete H2Z_A: Incomplete H2Z_AD: Incomplete H2Z_AK: Incomplete H2Z_D: Incomplete H2Z_K: Incomplete H2Z_DK: Incomplete H2Z_ALL: Incomplete Z2H_A: Incomplete Z2H_AD: Incomplete Z2H_AK: Incomplete Z2H_D: Incomplete Z2H_K: Incomplete Z2H_DK: Incomplete Z2H_ALL: Incomplete KANA2HEP: Incomplete HEP2KANA: Incomplete JULIUS_LONG_VOWEL: Incomplete SMALL_KANA2NORMAL_KANA: Incomplete ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/jaconv/jaconv.py0000644000076500000240000010773715142065620015367 0ustar00yukinostaff# -*- coding: utf-8 -*- from __future__ import unicode_literals import unicodedata import warnings from .compat import map from .conv_table import ( H2HK_TABLE, H2K_TABLE, H2Z_A, H2Z_AD, H2Z_AK, H2Z_ALL, H2Z_D, H2Z_DK, H2Z_K, HEP2KANA, JULIUS_LONG_VOWEL, K2H_TABLE, KANA2HEP, SMALL_KANA2NORMAL_KANA, Z2H_A, Z2H_AD, Z2H_AK, Z2H_ALL, Z2H_D, Z2H_DK, Z2H_K, ) consonants = frozenset('sdfghjklqwrtypzxcvbnm') def _exclude_ignorechar(ignore, conv_map): for character in map(ord, ignore): del conv_map[character] return conv_map def _convert(text, conv_map): return text.translate(conv_map) def _translate(text, ignore, conv_map): if ignore: _conv_map = _exclude_ignorechar(ignore, conv_map.copy()) return _convert(text, _conv_map) return _convert(text, conv_map) def hira2kata(text, ignore=''): """Convert Hiragana to Full-width (Zenkaku) Katakana. Parameters ---------- text : str Hiragana string. ignore : str, optional Characters to be ignored in converting. Return ------ str Katakana string. Examples -------- >>> print(jaconv.hira2kata('ともえまみ')) トモエマミ >>> print(jaconv.hira2kata('まどまぎ', ignore='ど')) マどマギ """ return _translate(text, ignore, H2K_TABLE) def hira2hkata(text, ignore=''): """Convert Hiragana to Half-width (Hankaku) Katakana Parameters ---------- text : str Hiragana string. ignore : str, optional Characters to be ignored in converting. Return ------ str Half-width Katakana string. Examples -------- >>> print(jaconv.hira2hkata('ともえまみ')) トモエマミ >>> print(jaconv.hira2hkata('ともえまみ', ignore='み')) トモエマみ """ return _translate(text, ignore, H2HK_TABLE) def kata2hira(text, ignore=''): """Convert Full-width Katakana to Hiragana Parameters ---------- text : str Full-width Katakana string. ignore : str, optional Characters to be ignored in converting. Return ------ str Hiragana string. Examples -------- >>> print(jaconv.kata2hira('巴マミ')) 巴まみ >>> print(jaconv.kata2hira('マミサン', ignore='ン')) まみさン """ return _translate(text, ignore, K2H_TABLE) def enlargesmallkana(text, ignore=''): warn_msg = ( '`enlargesmallkana` is deprecated and will be removed in future versions.' ' Use `enlarge_smallkana` instead.' ) warnings.warn(warn_msg, UserWarning) return enlarge_smallkana(text, ignore) def enlarge_smallkana(text, ignore=''): """Convert small Hiragana or Katakana to normal size Parameters ---------- text : str Full-width Hiragana or Katakana string. ignore : str, optional Characters to be ignored in converting. Return ------ str Hiragana or Katakana string, enlarged small Kana Examples -------- >>> print(jaconv.enlargesmallkana('さくらきょうこ')) さくらきようこ >>> print(jaconv.enlargesmallkana('キュゥべえ')) キユウべえ """ return _translate(text, ignore, SMALL_KANA2NORMAL_KANA) def h2z(text, ignore='', kana=True, ascii=False, digit=False): """Convert Half-width (Hankaku) Katakana to Full-width (Zenkaku) Katakana Parameters ---------- text : str Half-width Katakana string. ignore : str, optional Characters to be ignored in converting. kana : bool, optional Either converting Kana or not. ascii : bool, optional Either converting ascii or not. digit : bool, optional Either converting digit or not. Return ------ str Full-width Katakana string. Examples -------- >>> print(jaconv.h2z('ティロフィナーレ')) ティロフィナーレ >>> print(jaconv.h2z('ティロフィナーレ', ignore='ィ')) ティロフィナーレ >>> print(jaconv.h2z('abcd', ascii=True)) ABCD >>> print(jaconv.h2z('1234', digit=True)) 1234 """ def _conv_dakuten(text): """Convert Hankaku Dakuten Kana to Zenkaku Dakuten Kana""" text = text.replace('ガ', 'ガ').replace('ギ', 'ギ') text = text.replace('グ', 'グ').replace('ゲ', 'ゲ') text = text.replace('ゴ', 'ゴ').replace('ザ', 'ザ') text = text.replace('ジ', 'ジ').replace('ズ', 'ズ') text = text.replace('ゼ', 'ゼ').replace('ゾ', 'ゾ') text = text.replace('ダ', 'ダ').replace('ヂ', 'ヂ') text = text.replace('ヅ', 'ヅ').replace('デ', 'デ') text = text.replace('ド', 'ド').replace('バ', 'バ') text = text.replace('ビ', 'ビ').replace('ブ', 'ブ') text = text.replace('ベ', 'ベ').replace('ボ', 'ボ') text = text.replace('パ', 'パ').replace('ピ', 'ピ') text = text.replace('プ', 'プ').replace('ペ', 'ペ') return text.replace('ポ', 'ポ').replace('ヴ', 'ヴ') if ascii: if digit: if kana: h2z_map = H2Z_ALL else: h2z_map = H2Z_AD elif kana: h2z_map = H2Z_AK else: h2z_map = H2Z_A elif digit: if kana: h2z_map = H2Z_DK else: h2z_map = H2Z_D else: if kana: h2z_map = H2Z_K else: h2z_map = {} # empty if kana: text = _conv_dakuten(text) if ignore: h2z_map = _exclude_ignorechar(ignore, h2z_map.copy()) return _convert(text, h2z_map) def hankaku2zenkaku(text, ignore='', kana=True, ascii=False, digit=False): """An alias of h2z""" return h2z(text, ignore, kana, ascii, digit) def han2zen(text, ignore='', kana=True, ascii=False, digit=False): """An alias of h2z""" return h2z(text, ignore, kana, ascii, digit) def z2h(text, ignore='', kana=True, ascii=False, digit=False): """Convert Full-width (Zenkaku) Katakana to Half-width (Hankaku) Katakana Parameters ---------- text : str Full-width Katakana string. ignore : str, optional Characters to be ignored in converting. kana : bool, optional Either converting Kana or not. ascii : bool, optional Either converting ascii or not. digit : bool, optional Either converting digit or not. Return ------ str Half-width Katakana string. Examples -------- >>> print(jaconv.z2h('ティロフィナーレ')) ティロフィナーレ >>> print(jaconv.z2h('ティロフィナーレ', ignore='ィ')) ティロフィナーレ >>> print(jaconv.z2h('ABCD', ascii=True)) abcd >>> print(jaconv.z2h('1234', digit=True)) 1234 """ if ascii: if digit: if kana: z2h_map = Z2H_ALL else: z2h_map = Z2H_AD elif kana: z2h_map = Z2H_AK else: z2h_map = Z2H_A elif digit: if kana: z2h_map = Z2H_DK else: z2h_map = Z2H_D else: if kana: z2h_map = Z2H_K else: z2h_map = {} # empty if ignore: z2h_map = _exclude_ignorechar(ignore, z2h_map.copy()) return _convert(text, z2h_map) def zenkaku2hankaku(text, ignore='', kana=True, ascii=False, digit=False): """An alias of z2h""" return z2h(text, ignore, kana, ascii, digit) def zen2han(text, ignore='', kana=True, ascii=False, digit=False): """An alias of z2h""" return z2h(text, ignore, kana, ascii, digit) def normalize(text, mode='NFKC'): """Convert Half-width (Hankaku) Katakana to Full-width (Zenkaku) Katakana, Full-width (Zenkaku) ASCII and DIGIT to Half-width (Hankaku) ASCII and DIGIT. Additionally, Full-width wave dash (〜) etc. are normalized Parameters ---------- text : str Source string. mode : Literal['NFC', 'NFD', 'NFKC', 'NFKD'], optional Unicode normalization mode. Return ------ str Normalized string. Examples -------- >>> print(jaconv.normalize('ティロ・フィナ〜レ', 'NFKC')) ティロ・フィナーレ """ text = text.replace('〜', 'ー').replace('~', 'ー') text = text.replace('’', "'").replace('”', '"').replace('“', '"') text = text.replace('―', '-').replace('‐', '-').replace('˗', '-').replace('֊', '-') text = text.replace('‐', '-').replace('‑', '-').replace('‒', '-').replace('–', '-') text = text.replace('⁃', '-').replace('⁻', '-').replace('₋', '-').replace('−', '-') text = ( text.replace('﹣', 'ー') .replace('-', 'ー') .replace('—', 'ー') .replace('―', 'ー') ) text = text.replace('━', 'ー').replace('─', 'ー') return unicodedata.normalize(mode, text) # pyright: ignore[reportArgumentType] def kana2alphabet(text): """Convert Hiragana to Roman-input-style alphabets Parameters ---------- text : str Hiragana string. Return ------ str Roman-input-style alphabets string. Examples -------- >>> print(jaconv.kana2alphabet('まみさん')) mamisan """ text = text.replace('きゃ', 'kya').replace('きゅ', 'kyu').replace('きょ', 'kyo') text = text.replace('ぎゃ', 'gya').replace('ぎゅ', 'gyu').replace('ぎょ', 'gyo') text = text.replace('しゃ', 'sha').replace('しゅ', 'shu').replace('しょ', 'sho') text = text.replace('じゃ', 'ja').replace('じゅ', 'ju').replace('じょ', 'jo') text = text.replace('ちゃ', 'cha').replace('ちゅ', 'chu').replace('ちょ', 'cho') text = text.replace('にゃ', 'nya').replace('にゅ', 'nyu').replace('にょ', 'nyo') text = text.replace('ひゃ', 'hya').replace('ひゅ', 'hyu').replace('ひょ', 'hyo') text = text.replace('ふぁ', 'fa').replace('ふぃ', 'fi').replace('ふぇ', 'fe') text = text.replace('ふぉ', 'fo') text = text.replace('みゃ', 'mya').replace('みゅ', 'myu').replace('みょ', 'myo') text = text.replace('りゃ', 'rya').replace('りゅ', 'ryu').replace('りょ', 'ryo') text = text.replace('びゃ', 'bya').replace('びゅ', 'byu').replace('びょ', 'byo') text = text.replace('ぴゃ', 'pya').replace('ぴゅ', 'pyu').replace('ぴょ', 'pyo') text = text.replace('が', 'ga').replace('ぎ', 'gi').replace('ぐ', 'gu') text = text.replace('げ', 'ge').replace('ご', 'go').replace('ざ', 'za') text = text.replace('じ', 'ji').replace('ず', 'zu').replace('ぜ', 'ze') text = text.replace('ぞ', 'zo').replace('だ', 'da').replace('ぢ', 'ji') text = text.replace('づ', 'zu').replace('で', 'de').replace('ど', 'do') text = text.replace('ば', 'ba').replace('び', 'bi').replace('ぶ', 'bu') text = text.replace('べ', 'be').replace('ぼ', 'bo').replace('ぱ', 'pa') text = text.replace('ぴ', 'pi').replace('ぷ', 'pu').replace('ぺ', 'pe') text = text.replace('ぽ', 'po') text = text.replace('か', 'ka').replace('き', 'ki').replace('く', 'ku') text = text.replace('け', 'ke').replace('こ', 'ko').replace('さ', 'sa') text = text.replace('し', 'shi').replace('す', 'su').replace('せ', 'se') text = text.replace('そ', 'so').replace('た', 'ta').replace('ち', 'chi') text = text.replace('つ', 'tsu').replace('て', 'te').replace('と', 'to') text = text.replace('な', 'na').replace('に', 'ni').replace('ぬ', 'nu') text = text.replace('ね', 'ne').replace('の', 'no').replace('は', 'ha') text = text.replace('ひ', 'hi').replace('ふ', 'fu').replace('へ', 'he') text = text.replace('ほ', 'ho').replace('ま', 'ma').replace('み', 'mi') text = text.replace('む', 'mu').replace('め', 'me').replace('も', 'mo') text = text.replace('ら', 'ra').replace('り', 'ri').replace('る', 'ru') text = text.replace('れ', 're').replace('ろ', 'ro') text = text.replace('や', 'ya').replace('ゆ', 'yu').replace('よ', 'yo') text = text.replace('わ', 'wa').replace('ゐ', 'wi').replace('を', 'wo') text = text.replace('ゑ', 'we') text = text.replace('ゔぁ', 'va').replace('ゔぃ', 'vi').replace('ゔぅ', 'vuu') text = text.replace('ゔぇ', 've').replace('ゔぉ', 'vo') text = text.replace('ゃ', 'ya').replace('ゅ', 'yu').replace('ょ', 'yo') text = text.replace('ぁ', 'a').replace('ぃ', 'i').replace('ぅ', 'u') text = text.replace('ぇ', 'e').replace('ぉ', 'o') text = text.replace('ゎ', 'wa') text = text.replace('ゔ', 'vu') text = text.replace('ヵ', 'ka') # Strictly, it's kanji, not kana. text = _convert(text, KANA2HEP) while 'っ' in text: chars = list(text) tsu_pos = chars.index('っ') if len(chars) <= tsu_pos + 1: return ''.join(chars[:-1]) + 'xtsu' if tsu_pos == 0: chars[tsu_pos] = 'xtsu' elif chars[tsu_pos + 1] == 'っ': chars[tsu_pos] = 'xtsu' else: chars[tsu_pos] = chars[tsu_pos + 1] text = ''.join(chars) return text def kata2alphabet(text): """Convert Katakana to Roman-input-style alphabets Parameters ---------- text : str Katakana string. Return ------ str Roman-input-style alphabets string. Examples -------- >>> print(jaconv.kata2alphabet('マミサン')) mamisan """ return kana2alphabet(kata2hira(text)) def alphabet2kana(text): """Convert alphabets to Hiragana Parameters ---------- text : str Roman-input-style alphabets string. Return ------ str Hiragana string. Examples -------- >>> print(jaconv.alphabet2kana('mamisan')) まみさん """ text = text.lower() # ensure lower case. # replace final h with う, e.g., Itoh -> いとう if text.endswith('h') and len(text) >= 2: text = text[:-1] + 'う' text = text.replace('tch', 'っch') text = text.replace('bb', 'っb').replace('cc', 'っc').replace('dd', 'っd') text = text.replace('ff', 'っf').replace('gg', 'っg').replace('hh', 'っh') text = text.replace('jj', 'っj').replace('kk', 'っk').replace('ll', 'っl') text = text.replace('mm', 'っm').replace('pp', 'っp').replace('qq', 'っq') text = text.replace('rr', 'っr').replace('ss', 'っs').replace('tt', 'っt') text = text.replace('vv', 'っv').replace('ww', 'っw').replace('xx', 'っx') text = text.replace('yy', 'っy').replace('zz', 'っz') text = text.replace('kya', 'きゃ').replace('kyi', 'きぃ').replace('kyu', 'きゅ') text = text.replace('kye', 'きぇ').replace('kyo', 'きょ') text = text.replace('gya', 'ぎゃ').replace('gyi', 'ぎぃ').replace('gyu', 'ぎゅ') text = text.replace('gye', 'ぎぇ').replace('gyo', 'ぎょ') text = text.replace('sha', 'しゃ').replace('shi', 'し').replace('shu', 'しゅ') text = text.replace('she', 'しぇ').replace('sho', 'しょ') text = text.replace('sya', 'しゃ').replace('syi', 'しぃ').replace('syu', 'しゅ') text = text.replace('sye', 'しぇ').replace('syo', 'しょ') text = text.replace('zya', 'じゃ').replace('zyu', 'じゅ').replace('zyo', 'じょ') text = text.replace('zyi', 'じぃ').replace('zye', 'じぇ') text = text.replace('jya', 'じゃ').replace('jyi', 'じぃ').replace('jyu', 'じゅ') text = text.replace('jye', 'じぇ').replace('jyo', 'じょ') text = text.replace('dya', 'ぢゃ').replace('dyi', 'ぢぃ').replace('dyu', 'ぢゅ') text = text.replace('dye', 'ぢぇ').replace('dyo', 'ぢょ') text = text.replace('cha', 'ちゃ').replace('chu', 'ちゅ').replace('che', 'ちぇ') text = text.replace('cho', 'ちょ') text = text.replace('cya', 'ちゃ').replace('cyi', 'ちぃ').replace('cyu', 'ちゅ') text = text.replace('cye', 'ちぇ').replace('cyo', 'ちょ') text = text.replace('tya', 'ちゃ').replace('tyi', 'ちぃ').replace('tyu', 'ちゅ') text = text.replace('tye', 'ちぇ').replace('tyo', 'ちょ') text = text.replace('tsa', 'つぁ').replace('tsi', 'つぃ').replace('tse', 'つぇ') text = text.replace('tso', 'つぉ') text = text.replace('thi', 'てぃ').replace("t'i", 'てぃ') text = text.replace('tha', 'てゃ').replace('thu', 'てゅ').replace("t'yu", 'てゅ') text = text.replace('the', 'てぇ').replace('tho', 'てょ') text = text.replace('dha', 'でゃ').replace('dhi', 'でぃ').replace("d'i", 'でぃ') text = text.replace('dhu', 'でゅ').replace('dhe', 'でぇ').replace('dho', 'でょ') text = text.replace("d'yu", 'でゅ') text = text.replace('twa', 'とぁ').replace('twi', 'とぃ').replace('twu', 'とぅ') text = text.replace('twe', 'とぇ').replace('two', 'とぉ').replace("t'u", 'とぅ') text = text.replace('dwa', 'どぁ').replace('dwi', 'どぃ').replace('dwu', 'どぅ') text = text.replace('dwe', 'どぇ').replace('dwo', 'どぉ').replace("d'u", 'どぅ') text = text.replace('nya', 'にゃ').replace('nyi', 'にぃ').replace('nyu', 'にゅ') text = text.replace('nye', 'にぇ').replace('nyo', 'にょ') text = text.replace('hya', 'ひゃ').replace('hyi', 'ひぃ').replace('hyu', 'ひゅ') text = text.replace('hye', 'ひぇ').replace('hyo', 'ひょ') text = text.replace('hwa', 'ふぁ').replace('hwi', 'ふぃ').replace('hwe', 'ふぇ') text = text.replace('hwo', 'ふぉ').replace('hwyu', 'ふゅ') text = text.replace('fya', 'ふゃ').replace('fyu', 'ふゅ').replace('fyo', 'ふょ') text = text.replace('pha', 'ふぁ').replace('phi', 'ふぃ').replace('phu', 'ふぅ') text = text.replace('phe', 'ふぇ').replace('pho', 'ふぉ') text = text.replace('mya', 'みゃ').replace('myi', 'みぃ').replace('myu', 'みゅ') text = text.replace('mye', 'みぇ').replace('myo', 'みょ') text = text.replace('rya', 'りゃ').replace('ryi', 'りぃ').replace('ryu', 'りゅ') text = text.replace('rye', 'りぇ').replace('ryo', 'りょ') text = text.replace('bya', 'びゃ').replace('byi', 'びぃ').replace('byu', 'びゅ') text = text.replace('bye', 'びぇ').replace('byo', 'びょ') text = text.replace('pya', 'ぴゃ').replace('pyi', 'ぴぃ').replace('pyu', 'ぴゅ') text = text.replace('pye', 'ぴぇ').replace('pyo', 'ぴょ') text = text.replace('vyi', 'ゔぃ').replace('vyu', 'ゔゅ').replace('vye', 'ゔぇ') text = text.replace('vyo', 'ゔょ') text = text.replace('wye', 'ゑ') text = text.replace('kwa', 'くぁ').replace('kwi', 'くぃ').replace('kwu', 'くぅ') text = text.replace('kwe', 'くぇ').replace('kwo', 'くぉ') text = text.replace('gwa', 'ぐぁ').replace('gwi', 'ぐぃ').replace('gwu', 'ぐぅ') text = text.replace('gwe', 'ぐぇ').replace('gwo', 'ぐぉ') text = text.replace('swa', 'すぁ').replace('swi', 'すぃ').replace('swu', 'すぅ') text = text.replace('swe', 'すぇ').replace('swo', 'すぉ') text = text.replace('zwa', 'ずぁ').replace('zwi', 'ずぃ').replace('zwu', 'ずぅ') text = text.replace('zwe', 'ずぇ').replace('zwo', 'ずぉ') text = text.replace('vya', 'ゔゃ') text = text.replace('xtu', 'っ').replace('xtsu', 'っ') text = text.replace('ltu', 'っ').replace('ltsu', 'っ') text = text.replace('xya', 'ゃ').replace('lya', 'ゃ') text = text.replace('xyu', 'ゅ').replace('lyu', 'ゅ') text = text.replace('xyo', 'ょ').replace('lyo', 'ょ') text = text.replace('wha', 'うぁ').replace('whi', 'うぃ').replace('whu', 'う') text = text.replace('whe', 'うぇ').replace('who', 'うぉ') text = text.replace('xwa', 'ゎ').replace('lwa', 'ゎ') text = text.replace('lyi', 'ぃ').replace('xyi', 'ぃ') text = text.replace('lye', 'ぇ').replace('xye', 'ぇ') text = text.replace('xka', 'ヵ').replace('lka', 'ヵ') text = text.replace('xke', 'ヶ').replace('lke', 'ヶ') text = text.replace('tsu', 'つ') text = text.replace('nn', 'ん') text = text.replace('ja', 'じゃ').replace('ji', 'じ').replace('ju', 'じゅ') text = text.replace('je', 'じぇ').replace('jo', 'じょ') text = text.replace('ga', 'が').replace('gi', 'ぎ').replace('gu', 'ぐ') text = text.replace('ge', 'げ').replace('go', 'ご') text = text.replace('za', 'ざ').replace('zi', 'じ').replace('zu', 'ず') text = text.replace('ze', 'ぜ').replace('zo', 'ぞ') text = text.replace('da', 'だ').replace('di', 'ぢ').replace('du', 'づ') text = text.replace('de', 'で').replace('do', 'ど') text = text.replace('va', 'ゔぁ').replace('vi', 'ゔぃ').replace('vu', 'ゔ') text = text.replace('ve', 'ゔぇ').replace('vo', 'ゔぉ') text = text.replace('ba', 'ば').replace('bi', 'び').replace('bu', 'ぶ') text = text.replace('be', 'べ').replace('bo', 'ぼ') text = text.replace('pa', 'ぱ').replace('pi', 'ぴ').replace('pu', 'ぷ') text = text.replace('pe', 'ぺ').replace('po', 'ぽ') text = text.replace('ka', 'か').replace('ki', 'き').replace('ku', 'く') text = text.replace('ke', 'け').replace('ko', 'こ') text = text.replace('qa', 'くぁ').replace('qi', 'くぃ').replace('qu', 'く') text = text.replace('qe', 'くぇ').replace('qo', 'くぉ') text = text.replace('ca', 'か').replace('cu', 'く').replace('co', 'こ') text = text.replace('ci', 'し').replace('ce', 'せ') text = text.replace('sa', 'さ').replace('si', 'し').replace('su', 'す') text = text.replace('se', 'せ').replace('so', 'そ') text = text.replace('ta', 'た').replace('chi', 'ち').replace('ti', 'ち') text = text.replace('tu', 'つ').replace('te', 'て').replace('to', 'と') text = text.replace('na', 'な').replace('ni', 'に').replace('nu', 'ぬ') text = text.replace('ne', 'ね').replace('no', 'の') text = text.replace('ha', 'は').replace('hi', 'ひ').replace('fu', 'ふ') text = text.replace('hu', 'ふ').replace('he', 'へ').replace('ho', 'ほ') text = text.replace('fa', 'ふぁ').replace('fi', 'ふぃ').replace('fe', 'ふぇ') text = text.replace('fo', 'ふぉ') text = text.replace('ma', 'ま').replace('mi', 'み').replace('mu', 'む') text = text.replace('me', 'め').replace('mo', 'も') text = text.replace('ra', 'ら').replace('ri', 'り').replace('ru', 'る') text = text.replace('re', 'れ').replace('ro', 'ろ') text = text.replace('la', 'ら').replace('li', 'り').replace('lu', 'る') text = text.replace('le', 'れ').replace('lo', 'ろ') text = text.replace('ya', 'や').replace('yu', 'ゆ').replace('yo', 'よ') text = text.replace('ye', 'いぇ') text = text.replace('wa', 'わ').replace('wi', 'うぃ').replace('wyi', 'ゐ') text = text.replace('wu', 'う').replace('wo', 'を') text = text.replace('oh', 'おお') text = text.replace('xa', 'ぁ').replace('xi', 'ぃ').replace('xu', 'ぅ') text = text.replace('xe', 'ぇ').replace('xo', 'ぉ') text = text.replace("n'", 'ん').replace('xn', 'ん').replace('m', 'ん') text = _convert(text, HEP2KANA) ret = [] for i, char in enumerate(text): if char in consonants: char = 'っ' ret.append(char) return ''.join(ret) def alphabet2kata(text): """Convert alphabets to Katakana Parameters ---------- text : str Roman-input-style alphabets string. Return ------ str Katakana string. Examples -------- >>> print(jaconv.alphabet2kata('mamisan')) マミサン """ return hira2kata(alphabet2kana(text)) def hiragana2julius(text): """Convert Hiragana to Julius's phoneme format. Parameters ---------- text : str Hiragana string. Return ------ str Alphabet string. Examples -------- >>> print(jaconv.hiragana2julius('てんきすごくいいいいいい')) t e N k i s u g o k u i: """ # 3文字以上からなる変換規則 text = text.replace('う゛ぁ', ' b a') text = text.replace('う゛ぃ', ' b i') text = text.replace('う゛ぇ', ' b e') text = text.replace('う゛ぉ', ' b o') text = text.replace('う゛ゅ', ' by u') # 2文字からなる変換規則 text = text.replace('ぅ゛', ' b u') text = text.replace('あぁ', ' a a') text = text.replace('いぃ', ' i i') text = text.replace('いぇ', ' i e') text = text.replace('いゃ', ' y a') text = text.replace('うぅ', ' u:') text = text.replace('えぇ', ' e e') text = text.replace('おぉ', ' o:') text = text.replace('かぁ', ' k a:') text = text.replace('きぃ', ' k i:') text = text.replace('くぅ', ' k u:') text = text.replace('くゃ', ' ky a') text = text.replace('くゅ', ' ky u') text = text.replace('くょ', ' ky o') text = text.replace('けぇ', ' k e:') text = text.replace('こぉ', ' k o:') text = text.replace('がぁ', ' g a:') text = text.replace('ぎぃ', ' g i:') text = text.replace('ぐぅ', ' g u:') text = text.replace('ぐゃ', ' gy a') text = text.replace('ぐゅ', ' gy u') text = text.replace('ぐょ', ' gy o') text = text.replace('げぇ', ' g e:') text = text.replace('ごぉ', ' g o:') text = text.replace('さぁ', ' s a:') text = text.replace('しぃ', ' sh i:') text = text.replace('すぅ', ' s u:') text = text.replace('すゃ', ' sh a') text = text.replace('すゅ', ' sh u') text = text.replace('すょ', ' sh o') text = text.replace('せぇ', ' s e:') text = text.replace('そぉ', ' s o:') text = text.replace('ざぁ', ' z a:') text = text.replace('じぃ', ' j i:') text = text.replace('ずぅ', ' z u:') text = text.replace('ずゃ', ' zy a') text = text.replace('ずゅ', ' zy u') text = text.replace('ずょ', ' zy o') text = text.replace('ぜぇ', ' z e:') text = text.replace('ぞぉ', ' z o:') text = text.replace('たぁ', ' t a:') text = text.replace('ちぃ', ' ch i:') text = text.replace('つぁ', ' ts a') text = text.replace('つぃ', ' ts i') text = text.replace('つぅ', ' ts u:') text = text.replace('つゃ', ' ch a') text = text.replace('つゅ', ' ch u') text = text.replace('つょ', ' ch o') text = text.replace('つぇ', ' ts e') text = text.replace('つぉ', ' ts o') text = text.replace('てぇ', ' t e:') text = text.replace('とぉ', ' t o:') text = text.replace('だぁ', ' d a:') text = text.replace('ぢぃ', ' j i:') text = text.replace('づぅ', ' d u:') text = text.replace('づゃ', ' zy a') text = text.replace('づゅ', ' zy u') text = text.replace('づょ', ' zy o') text = text.replace('でぇ', ' d e:') text = text.replace('どぉ', ' d o:') text = text.replace('なぁ', ' n a:') text = text.replace('にぃ', ' n i:') text = text.replace('ぬぅ', ' n u:') text = text.replace('ぬゃ', ' ny a') text = text.replace('ぬゅ', ' ny u') text = text.replace('ぬょ', ' ny o') text = text.replace('ねぇ', ' n e:') text = text.replace('のぉ', ' n o:') text = text.replace('はぁ', ' h a:') text = text.replace('ひぃ', ' h i:') text = text.replace('ふぅ', ' f u:') text = text.replace('ふゃ', ' hy a') text = text.replace('ふゅ', ' hy u') text = text.replace('ふょ', ' hy o') text = text.replace('へぇ', ' h e:') text = text.replace('ほぉ', ' h o:') text = text.replace('ばぁ', ' b a:') text = text.replace('びぃ', ' b i:') text = text.replace('ぶぅ', ' b u:') text = text.replace('ふゃ', ' hy a') text = text.replace('ぶゅ', ' by u') text = text.replace('ふょ', ' hy o') text = text.replace('べぇ', ' b e:') text = text.replace('ぼぉ', ' b o:') text = text.replace('ぱぁ', ' p a:') text = text.replace('ぴぃ', ' p i:') text = text.replace('ぷぅ', ' p u:') text = text.replace('ぷゃ', ' py a') text = text.replace('ぷゅ', ' py u') text = text.replace('ぷょ', ' py o') text = text.replace('ぺぇ', ' p e:') text = text.replace('ぽぉ', ' p o:') text = text.replace('まぁ', ' m a:') text = text.replace('みぃ', ' m i:') text = text.replace('むぅ', ' m u:') text = text.replace('むゃ', ' my a') text = text.replace('むゅ', ' my u') text = text.replace('むょ', ' my o') text = text.replace('めぇ', ' m e:') text = text.replace('もぉ', ' m o:') text = text.replace('やぁ', ' y a:') text = text.replace('ゆぅ', ' y u:') text = text.replace('ゆゃ', ' y a:') text = text.replace('ゆゅ', ' y u:') text = text.replace('ゆょ', ' y o:') text = text.replace('よぉ', ' y o:') text = text.replace('らぁ', ' r a:') text = text.replace('りぃ', ' r i:') text = text.replace('るぅ', ' r u:') text = text.replace('るゃ', ' ry a') text = text.replace('るゅ', ' ry u') text = text.replace('るょ', ' ry o') text = text.replace('れぇ', ' r e:') text = text.replace('ろぉ', ' r o:') text = text.replace('わぁ', ' w a:') text = text.replace('をぉ', ' o:') text = text.replace('う゛', ' b u') text = text.replace('でぃ', ' d i') text = text.replace('でぇ', ' d e:') text = text.replace('でゃ', ' dy a') text = text.replace('でゅ', ' dy u') text = text.replace('でょ', ' dy o') text = text.replace('てぃ', ' t i') text = text.replace('てぇ', ' t e:') text = text.replace('てゃ', ' ty a') text = text.replace('てゅ', ' ty u') text = text.replace('てょ', ' ty o') text = text.replace('すぃ', ' s i') text = text.replace('ずぁ', ' z u a') text = text.replace('ずぃ', ' z i') text = text.replace('ずぅ', ' z u') text = text.replace('ずゃ', ' zy a') text = text.replace('ずゅ', ' zy u') text = text.replace('ずょ', ' zy o') text = text.replace('ずぇ', ' z e') text = text.replace('ずぉ', ' z o') text = text.replace('きゃ', ' ky a') text = text.replace('きゅ', ' ky u') text = text.replace('きょ', ' ky o') text = text.replace('しゃ', ' sh a') text = text.replace('しゅ', ' sh u') text = text.replace('しぇ', ' sh e') text = text.replace('しょ', ' sh o') text = text.replace('ちゃ', ' ch a') text = text.replace('ちゅ', ' ch u') text = text.replace('ちぇ', ' ch e') text = text.replace('ちょ', ' ch o') text = text.replace('とぅ', ' t u') text = text.replace('とゃ', ' ty a') text = text.replace('とゅ', ' ty u') text = text.replace('とょ', ' ty o') text = text.replace('どぁ', ' d o a') text = text.replace('どぅ', ' d u') text = text.replace('どゃ', ' dy a') text = text.replace('どゅ', ' dy u') text = text.replace('どょ', ' dy o') text = text.replace('どぉ', ' d o:') text = text.replace('にゃ', ' ny a') text = text.replace('にゅ', ' ny u') text = text.replace('にょ', ' ny o') text = text.replace('ひゃ', ' hy a') text = text.replace('ひゅ', ' hy u') text = text.replace('ひょ', ' hy o') text = text.replace('みゃ', ' my a') text = text.replace('みゅ', ' my u') text = text.replace('みょ', ' my o') text = text.replace('りゃ', ' ry a') text = text.replace('りゅ', ' ry u') text = text.replace('りょ', ' ry o') text = text.replace('ぎゃ', ' gy a') text = text.replace('ぎゅ', ' gy u') text = text.replace('ぎょ', ' gy o') text = text.replace('ぢぇ', ' j e') text = text.replace('ぢゃ', ' j a') text = text.replace('ぢゅ', ' j u') text = text.replace('ぢょ', ' j o') text = text.replace('じぇ', ' j e') text = text.replace('じゃ', ' j a') text = text.replace('じゅ', ' j u') text = text.replace('じょ', ' j o') text = text.replace('びゃ', ' by a') text = text.replace('びゅ', ' by u') text = text.replace('びょ', ' by o') text = text.replace('ぴゃ', ' py a') text = text.replace('ぴゅ', ' py u') text = text.replace('ぴょ', ' py o') text = text.replace('うぁ', ' u a') text = text.replace('うぃ', ' w i') text = text.replace('うぇ', ' w e') text = text.replace('うぉ', ' w o') text = text.replace('ふぁ', ' f a') text = text.replace('ふぃ', ' f i') text = text.replace('ふぅ', ' f u') text = text.replace('ふゃ', ' hy a') text = text.replace('ふゅ', ' hy u') text = text.replace('ふょ', ' hy o') text = text.replace('ふぇ', ' f e') text = text.replace('ふぉ', ' f o') # 1音からなる変換規則 text = text.replace('あ', ' a') text = text.replace('い', ' i') text = text.replace('う', ' u') text = text.replace('え', ' e') text = text.replace('お', ' o') text = text.replace('か', ' k a') text = text.replace('き', ' k i') text = text.replace('く', ' k u') text = text.replace('け', ' k e') text = text.replace('こ', ' k o') text = text.replace('さ', ' s a') text = text.replace('し', ' sh i') text = text.replace('す', ' s u') text = text.replace('せ', ' s e') text = text.replace('そ', ' s o') text = text.replace('た', ' t a') text = text.replace('ち', ' ch i') text = text.replace('つ', ' ts u') text = text.replace('て', ' t e') text = text.replace('と', ' t o') text = text.replace('な', ' n a') text = text.replace('に', ' n i') text = text.replace('ぬ', ' n u') text = text.replace('ね', ' n e') text = text.replace('の', ' n o') text = text.replace('は', ' h a') text = text.replace('ひ', ' h i') text = text.replace('ふ', ' f u') text = text.replace('へ', ' h e') text = text.replace('ほ', ' h o') text = text.replace('ま', ' m a') text = text.replace('み', ' m i') text = text.replace('む', ' m u') text = text.replace('め', ' m e') text = text.replace('も', ' m o') text = text.replace('ら', ' r a') text = text.replace('り', ' r i') text = text.replace('る', ' r u') text = text.replace('れ', ' r e') text = text.replace('ろ', ' r o') text = text.replace('が', ' g a') text = text.replace('ぎ', ' g i') text = text.replace('ぐ', ' g u') text = text.replace('げ', ' g e') text = text.replace('ご', ' g o') text = text.replace('ざ', ' z a') text = text.replace('じ', ' j i') text = text.replace('ず', ' z u') text = text.replace('ぜ', ' z e') text = text.replace('ぞ', ' z o') text = text.replace('だ', ' d a') text = text.replace('ぢ', ' j i') text = text.replace('づ', ' z u') text = text.replace('で', ' d e') text = text.replace('ど', ' d o') text = text.replace('ば', ' b a') text = text.replace('び', ' b i') text = text.replace('ぶ', ' b u') text = text.replace('べ', ' b e') text = text.replace('ぼ', ' b o') text = text.replace('ぱ', ' p a') text = text.replace('ぴ', ' p i') text = text.replace('ぷ', ' p u') text = text.replace('ぺ', ' p e') text = text.replace('ぽ', ' p o') text = text.replace('や', ' y a') text = text.replace('ゆ', ' y u') text = text.replace('よ', ' y o') text = text.replace('わ', ' w a') text = text.replace('ゐ', ' i') text = text.replace('ゑ', ' e') text = text.replace('ん', ' N') text = text.replace('っ', ' q') # ここまでに処理されてない ぁぃぅぇぉ はそのまま大文字扱い text = text.replace('ぁ', ' a') text = text.replace('ぃ', ' i') text = text.replace('ぅ', ' u') text = text.replace('ぇ', ' e') text = text.replace('ぉ', ' o') text = text.replace('ゎ', ' w a') # 長音の処理 for pattern, replace_str in JULIUS_LONG_VOWEL: text = pattern.sub(replace_str, text) text = text.replace('o u', 'o:') # おう -> おーの音便 text = text.replace('ー', ':') text = text.replace('〜', ':') text = text.replace('−', ':') text = text.replace('-', ':') # その他特別な処理 text = text.replace('を', ' o') text = text.strip() text = text.replace(':+', ':') return text ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/jaconv/jaconv.pyi0000644000076500000240000000525315142065620015526 0ustar00yukinostaffimport typing from _typeshed import Incomplete from .compat import map as map from .conv_table import H2HK_TABLE as H2HK_TABLE from .conv_table import H2K_TABLE as H2K_TABLE from .conv_table import H2Z_A as H2Z_A from .conv_table import H2Z_AD as H2Z_AD from .conv_table import H2Z_AK as H2Z_AK from .conv_table import H2Z_ALL as H2Z_ALL from .conv_table import H2Z_D as H2Z_D from .conv_table import H2Z_DK as H2Z_DK from .conv_table import H2Z_K as H2Z_K from .conv_table import HEP2KANA as HEP2KANA from .conv_table import JULIUS_LONG_VOWEL as JULIUS_LONG_VOWEL from .conv_table import K2H_TABLE as K2H_TABLE from .conv_table import KANA2HEP as KANA2HEP from .conv_table import SMALL_KANA2NORMAL_KANA as SMALL_KANA2NORMAL_KANA from .conv_table import Z2H_A as Z2H_A from .conv_table import Z2H_AD as Z2H_AD from .conv_table import Z2H_AK as Z2H_AK from .conv_table import Z2H_ALL as Z2H_ALL from .conv_table import Z2H_D as Z2H_D from .conv_table import Z2H_DK as Z2H_DK from .conv_table import Z2H_K as Z2H_K consonants: Incomplete def _exclude_ignorechar( ignore: str, conv_map: typing.Dict[int, str] ) -> typing.Dict[int, str]: ... def _convert(text: str, conv_map: typing.Dict[int, str]) -> str: ... def _translate(text: str, ignore: str, conv_map: typing.Dict[int, str]) -> str: ... def hira2kata(text: str, ignore: str = '') -> str: ... def hira2hkata(text: str, ignore: str = '') -> str: ... def kata2hira(text: str, ignore: str = '') -> str: ... def enlargesmallkana(text: str, ignore: str = '') -> str: ... def enlarge_smallkana(text: str, ignore: str = '') -> str: ... def h2z( text: str, ignore: str = '', kana: bool = True, ascii: bool = False, digit: bool = False, ) -> str: ... def hankaku2zenkaku( text: str, ignore: str = '', kana: bool = True, ascii: bool = False, digit: bool = False, ) -> str: ... def han2zen( text: str, ignore: str = '', kana: bool = True, ascii: bool = False, digit: bool = False, ) -> str: ... def z2h( text: str, ignore: str = '', kana: bool = True, ascii: bool = False, digit: bool = False, ) -> str: ... def zenkaku2hankaku( text: str, ignore: str = '', kana: bool = True, ascii: bool = False, digit: bool = False, ) -> str: ... def zen2han( text: str, ignore: str = '', kana: bool = True, ascii: bool = False, digit: bool = False, ) -> str: ... def normalize( text: str, mode: typing.Literal['NFC', 'NFD', 'NFKC', 'NFKD'] = 'NFKC' ) -> str: ... def kana2alphabet(text: str) -> str: ... def kata2alphabet(text: str) -> str: ... def alphabet2kana(text: str) -> str: ... def alphabet2kata(text: str) -> str: ... def hiragana2julius(text: str) -> str: ... ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1721924886.0 jaconv-0.5.0/jaconv/py.typed0000644000076500000240000000000014650476426015220 0ustar00yukinostaff././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1770549336.7089515 jaconv-0.5.0/jaconv.egg-info/0000755000076500000240000000000015142070131015201 5ustar00yukinostaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770549336.0 jaconv-0.5.0/jaconv.egg-info/PKG-INFO0000644000076500000240000002124415142070130016300 0ustar00yukinostaffMetadata-Version: 2.4 Name: jaconv Version: 0.5.0 Summary: Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, Zenkaku and more Home-page: https://github.com/ikegami-yukino/jaconv Author: Yukino Ikegami Author-email: yknikgm@gmail.com License: MIT License Keywords: Japanese converter,Japanese,text preprocessing,half-width kana,Hiragana,Katakana,Hankaku,Zenkaku,transliteration,Julius Platform: POSIX Platform: Windows Platform: Unix Platform: MacOS Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Information Technology Classifier: Natural Language :: Japanese Classifier: Operating System :: MacOS Classifier: Operating System :: Microsoft Classifier: Operating System :: POSIX Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: 3.14 Classifier: Topic :: Text Processing Classifier: Topic :: Text Processing :: General License-File: LICENSE Dynamic: description Dynamic: license-file Dynamic: summary jaconv ========== |version| |pyversion| |license| |download| |usedby| |githubstars| |nowar| |nonuke| jaconv (Japanese Converter) is interconverter for Hiragana, Katakana, Hankaku (half-width character) and Zenkaku (full-width character) `Japanese README `_ is available. INSTALLATION ============== :: $ pip install jaconv USAGE ============ See also `document `_ .. code:: python import jaconv # Hiragana to Katakana jaconv.hira2kata('ともえまみ') # => 'トモエマミ' # Hiragana to half-width Katakana jaconv.hira2hkata('ともえまみ') # => 'トモエマミ' # Katakana to Hiragana jaconv.kata2hira('巴マミ') # => '巴まみ' # half-width character to full-width character # default parameters are followings: kana=True, ascii=False, digit=False jaconv.h2z('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # half-width character to full-width character # but only ascii characters jaconv.h2z('abc', kana=False, ascii=True, digit=False) # => 'abc' # half-width character to full-width character # but only digit characters jaconv.h2z('123', kana=False, ascii=False, digit=True) # => '123' # half-width character to full-width character # except half-width Katakana jaconv.h2z('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # an alias of h2z jaconv.hankaku2zenkaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # full-width character to half-width character # default parameters are followings: kana=True, ascii=False, digit=False jaconv.z2h('ティロ・フィナーレ') # => 'ティロ・フィナーレ' # full-width character to half-width character # but only ascii characters jaconv.z2h('abc', kana=False, ascii=True, digit=False) # => 'abc' # full-width character to half-width character # but only digit characters jaconv.z2h('123', kana=False, ascii=False, digit=True) # => '123' # full-width character to half-width character # except full-width Katakana jaconv.z2h('アabc123', kana=False, digit=True, ascii=True) # => 'アabc123' # an alias of z2h jaconv.zenkaku2hankaku('ティロ・フィナーレabc123') # => 'ティロ・フィナーレabc123' # normalize jaconv.normalize('ティロ・フィナ〜レ', 'NFKC') # => 'ティロ・フィナーレ' # Convert small Hiragana or Katakana to normal size jaconv.enlarge_smallkana('わぁい') # => 'わあい' jaconv.enlarge_smallkana('きょういっぱい', ignore='っ') # => 'きよういっぱい' # Hiragana to alphabet jaconv.kana2alphabet('じゃぱん') # => 'japan' # Alphabet to Hiragana jaconv.alphabet2kana('japan') # => 'じゃぱん' # Katakana to Alphabet jaconv.kata2alphabet('ケツイ') # => 'ketsui' # Alphabet to Katakana jaconv.alphabet2kata('namba') # => 'ナンバ' # Hiragana to Julius's phoneme format jaconv.hiragana2julius('てんきすごくいいいいいい') # => 't e N k i s u g o k u i:' NOTE ============ jaconv.normalize method expand unicodedata.normalize for Japanese language processing. .. code:: '〜' => 'ー' '~' => 'ー' "’" => "'" '”'=> '"' '“' => '``' '―' => '-' '‐' => '-' '˗' => '-' '֊' => '-' '‐' => '-' '‑' => '-' '‒' => '-' '–' => '-' '⁃' => '-' '⁻' => '-' '₋' => '-' '−' => '-' '﹣' => 'ー' '-' => 'ー' '—' => 'ー' '―' => 'ー' '━' => 'ー' '─' => 'ー' .. |pyversion| image:: https://img.shields.io/pypi/pyversions/jaconv.svg .. |version| image:: https://img.shields.io/pypi/v/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: latest version .. |license| image:: https://img.shields.io/pypi/l/jaconv.svg :target: http://pypi.python.org/pypi/jaconv/ :alt: license .. |download| image:: https://static.pepy.tech/personalized-badge/jaconv?period=total&units=international_system&left_color=black&right_color=blue&left_text=Downloads :target: https://pepy.tech/project/jaconv :alt: download .. |usedby| image:: https://img.shields.io/github/search?query=import%20jaconv%20language%3Apython&label=Used%20in%20GitHub :target: https://github.com/search?q=import+jaconv+language%3Apython&type=code :alt: GitHub code search count .. |githubstars| image:: https://img.shields.io/github/stars/ikegami-yukino/jaconv :target: https://github.com/ikegami-yukino/jaconv :alt: GitHub Repo stars .. |nowar| image:: https://img.shields.io/badge/%F0%9F%A4%9D%20NO%20WAR-FF0000?style=plastic :alt: NO WAR budge .. |nonuke| image:: https://img.shields.io/badge/%E2%98%A2%20NO%20NUKE-FFFF00?style=plastic :alt: NO NUKE budge CHANGES ======= 0.5.0 (2026-02-08) ------------------- - Add new func. enlarge_smallkana. - The alphabet2kana func. converts "si" to "し". - Fix conversion bugs in alphabet2kana and kana2alphabet func. - Fix bugs about small-kana in kana2alphabet func. - Add docstring to alias functions 0.4.1 (2025-11-30) ------------------- - port static configs to setup.cfg (thanks @eli-schwartz) - migrate testsuite to pytest (thanks @eli-schwartz) - Support Python 3.13 and 3.14 0.4.0 (2024-07-26) ------------------- - Support Python 3.12 - Add stub files according to PEP 561 for mypy (thanks @ernix) 0.3.4 (2023-02-18) ------------------- - Fix to support Python2.7 ~ 3.4 (thanks @manjuu-eater) - Support Python 3.11 0.3.3 (2022-12-31) ------------------- - Support Python 3.10 - Re-support Python2.7 ~ 3.4 (thanks @manjuu-eater) - Fix z2h, h2z all flag off bug (thanks @manjuu-eater) 0.3.1 (2022-12-14) ------------------- - Fix alpha2kana infinite loop bug (thanks @frog42) 0.3 (2021-03-29) ------------------- - Fix bug (alphabet2kana) thanks @Cuddlemuffin007 - Support Python 3.8 and 3.9 - Add handy functions: alphabet2kata and kata2alphabet. thanks @kokimame - Add function for julius: hiragana2julius 0.2.4 (2018-02-04) ------------------- - Fix bug (kana2alphabet) - Support Python 3.7 - No longer support Python 2.6 - Add aliases of z2h -> zenkaku2hankaku and h2z -> hankaku2zenkaku 0.2.3 (2018-02-03) ------------------- - Fix bugs (alphabet2kana, kana2alphabet) thanks @letuananh 0.2.2 (2018-01-22) ------------------- - Fix bug (kana2alphabet) thanks @kokimame - Support Python 3.6 0.2.1 (2017-09-14) ------------------- - Fix bugs (alphabet2kana, kana2alphabet) 0.2 (2015-04-02) ------------------ - Change module name jctconv -> jaconv - Add alphabet and hiragana interconvert (alphabet2kana, kana2alphabet) 0.1.1 (2015-03-12) ------------------ - Support Windows - Support Python 3.5 0.1 (2014-11-24) ------------------ - Add some Japanese characters to convert table (ゝゞ・「」。、) - Decresing memory usage - Some function names are deprecated (hankaku2zenkaku, zenkaku2hankaku, H2K, H2hK, K2H) 0.0.7 (2014-03-22) ------------------ z2h and h2z allow mojimoji-like target character type determination. Bug fix about Half Kana conversion. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770549336.0 jaconv-0.5.0/jaconv.egg-info/SOURCES.txt0000644000076500000240000000057215142070130017070 0ustar00yukinostaffCHANGES.rst LICENSE MANIFEST.in README.rst README_JP.rst setup.cfg setup.py test_jaconv.py jaconv/__init__.py jaconv/__init__.pyi jaconv/compat.py jaconv/compat.pyi jaconv/conv_table.py jaconv/conv_table.pyi jaconv/jaconv.py jaconv/jaconv.pyi jaconv/py.typed jaconv.egg-info/PKG-INFO jaconv.egg-info/SOURCES.txt jaconv.egg-info/dependency_links.txt jaconv.egg-info/top_level.txt././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770549336.0 jaconv-0.5.0/jaconv.egg-info/dependency_links.txt0000644000076500000240000000000115142070130021246 0ustar00yukinostaff ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770549336.0 jaconv-0.5.0/jaconv.egg-info/top_level.txt0000644000076500000240000000000715142070130017727 0ustar00yukinostaffjaconv ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1770549336.7101278 jaconv-0.5.0/setup.cfg0000644000076500000240000000414315142070131014052 0ustar00yukinostaff[metadata] name = jaconv version = attr: jaconv.__version__ license = MIT License platforms = POSIX, Windows, Unix, MacOS author = Yukino Ikegami author_email = yknikgm@gmail.com url = https://github.com/ikegami-yukino/jaconv keywords = Japanese converter Japanese text preprocessing half-width kana Hiragana Katakana Hankaku Zenkaku transliteration Julius classifiers = Development Status :: 5 - Production/Stable Intended Audience :: Developers Intended Audience :: Information Technology Natural Language :: Japanese Operating System :: MacOS Operating System :: Microsoft Operating System :: POSIX Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 Programming Language :: Python :: 3.4 Programming Language :: Python :: 3.5 Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3.12 Programming Language :: Python :: 3.13 Programming Language :: Python :: 3.14 Topic :: Text Processing Topic :: Text Processing :: General [options] packages = jaconv [options.package_data] jaconv = py.typed, *.pyi [tox:tox] requires = tox>=4 env_list = format doc_format build 3.14 3.13 3.12 3.11 3.10 3.9 parallel_show_output = true [testenv] description = "run unit tests" deps = pytest>=8 pytest-sugar commands = python test_jaconv.py [testenv:format] description = "install ruff in a virtual environment and invoke it" deps = ruff>=0.14 skip_install = true commands = ruff check --select I --diff --fix jaconv ruff format --config "format.quote-style='single'" [testenv:doc_format] description = "Check rst format" deps = readme-renderer[md] skip_install = true commands = python -m readme_renderer README.rst -o /dev/null python -m readme_renderer README_JP.rst -o /dev/null python -m readme_renderer CHANGES.rst -o /dev/null [testenv:build] description = "Build the package" deps = build commands = python -m build [egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/setup.py0000644000076500000240000000056115142065620013752 0ustar00yukinostaff# -*- coding: utf-8 -*- from setuptools import setup from jaconv.compat import open setup( description='Pure-Python Japanese character interconverter for ' 'Hiragana, Katakana, Hankaku, Zenkaku and more', long_description='%s\n\n%s' % ( open('README.rst', encoding='utf8').read(), open('CHANGES.rst', encoding='utf8').read(), ), ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1770548112.0 jaconv-0.5.0/test_jaconv.py0000644000076500000240000002136715142065620015140 0ustar00yukinostaff# -*- coding: utf-8 -*- from __future__ import unicode_literals from functools import partial import jaconv HIRAGANA = ( 'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞた', 'だちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽま', 'みむめもゃやゅゆょよらりるれろわをんーゎゐゑゕゖゔゝゞ・「」。、', ) FULL_KANA = ( 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタ', 'ダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマ', 'ミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヴヽヾ・「」。、', ) HALF_KANA = ( 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタ', 'ダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマ', 'ミムメモャヤュユョヨラリルレロワヲンーヮヰヱヵヶヴヽヾ・「」。、', ) HALF_ASCII = ( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`~', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz{|} ', ) HALF_DIGIT = '0123456789' FULL_ASCII = ( '!"#$%&'()*+,-./:;<=>?@[\]^_`~', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz{|} ', ) FULL_DIGIT = '0123456789' def _compare(mathod, lhs, rhs): for i in range(len(lhs)): assert mathod(lhs[i]) == rhs[i] def _concat(*iterables): result = '' for iterable in iterables: result += ''.join(iterable) return result def test_hira2kata(): assert jaconv.hira2kata('ともえまみ') == 'トモエマミ' assert jaconv.hira2kata('まどまぎ', ignore='ど') == 'マどマギ' _compare(jaconv.hira2kata, HIRAGANA, FULL_KANA) def test_hira2hkata(): assert jaconv.hira2hkata('ともえまみ') == 'トモエマミ' assert jaconv.hira2hkata('ともえまみ', ignore='み') == 'トモエマみ' _compare(jaconv.hira2hkata, HIRAGANA, HALF_KANA) def test_kata2hira(): assert jaconv.kata2hira('巴マミ') == '巴まみ' assert jaconv.kata2hira('マミサン', ignore='ン') == 'まみさン' _compare(jaconv.kata2hira, FULL_KANA, HIRAGANA) def test_h2z(): assert jaconv.h2z('ティロフィナーレ') == 'ティロフィナーレ' assert jaconv.h2z('ティロフィナーレ', ignore='ィ') == 'ティロフィナーレ' _compare(jaconv.h2z, HALF_KANA, FULL_KANA) _compare(partial(jaconv.h2z, ascii=True), HALF_ASCII, FULL_ASCII) _compare(partial(jaconv.h2z, digit=True), HALF_DIGIT, FULL_DIGIT) for ascii in (True, False): for digit in (True, False): for kana in (True, False): before = _concat( FULL_KANA, HALF_KANA, FULL_ASCII, HALF_ASCII, FULL_DIGIT, HALF_DIGIT ) after = _concat( FULL_KANA, FULL_KANA if kana else HALF_KANA, FULL_ASCII, FULL_ASCII if ascii else HALF_ASCII, FULL_DIGIT, FULL_DIGIT if digit else HALF_DIGIT, ) converted = jaconv.h2z(before, ascii=ascii, digit=digit, kana=kana) assert converted == after def test_z2h(): assert jaconv.z2h('ティロフィナーレ') == 'ティロフィナーレ' assert jaconv.z2h('ティロフィナーレ', ignore='ィ') == 'ティロフィナーレ' _compare(partial(jaconv.z2h, kana=True), FULL_KANA, HALF_KANA) _compare(partial(jaconv.z2h, ascii=True), FULL_ASCII, HALF_ASCII) _compare(partial(jaconv.z2h, digit=True), FULL_DIGIT, HALF_DIGIT) for ascii in (True, False): for digit in (True, False): for kana in (True, False): before = _concat( FULL_KANA, HALF_KANA, FULL_ASCII, HALF_ASCII, FULL_DIGIT, HALF_DIGIT ) after = _concat( HALF_KANA if kana else FULL_KANA, HALF_KANA, HALF_ASCII if ascii else FULL_ASCII, HALF_ASCII, HALF_DIGIT if digit else FULL_DIGIT, HALF_DIGIT, ) converted = jaconv.z2h(before, ascii=ascii, digit=digit, kana=kana) assert converted == after def test_normalize(): assert jaconv.normalize('ティロ・フィナ〜レ', 'NFKC') == 'ティロ・フィナーレ' assert jaconv.normalize(_concat(HALF_KANA, FULL_DIGIT), 'NFKC') == ''.join( FULL_KANA ) + ''.join(HALF_DIGIT) def test_kana2alphabet(): assert jaconv.kana2alphabet('まみさん') == 'mamisan' assert jaconv.kana2alphabet('はっとり') == 'hattori' assert jaconv.kana2alphabet('はっ') == 'haxtsu' assert jaconv.kana2alphabet('ぽっ') == 'poxtsu' assert jaconv.kana2alphabet('ふぁふぃふぇふぉ') == 'fafifefo' assert jaconv.kana2alphabet('っって') == 'xtsutte' assert jaconv.kana2alphabet('ゔぃーた') == 'vi-ta' # Single small kana assert jaconv.kana2alphabet('ゅ') == 'yu' assert jaconv.kana2alphabet('ゃ') == 'ya' assert jaconv.kana2alphabet('ょ') == 'yo' # Small kana at start assert jaconv.kana2alphabet('ゅい') == 'yui' # Small kana in middle (non-standard pattern) assert jaconv.kana2alphabet('さくらゅい') == 'sakurayui' # Valid 拗音 patterns should still work assert jaconv.kana2alphabet('きゅ') == 'kyu' assert jaconv.kana2alphabet('りゅう') == 'ryuu' def text_kata2alphabet(): assert jaconv.kata2alphabet('マミサン') == 'mamisan' assert jaconv.kata2alphabet('ドッグ ドグー') == 'doggu doguu' assert jaconv.kata2alphabet('ボッチ') == 'botchi' assert jaconv.kata2alphabet('ファイナルファンタジー') == 'fainarufantaji-' assert jaconv.kata2alphabet('アツイ') == 'atsui' assert jaconv.kata2alphabet('イトウ') == 'itoh' assert jaconv.kata2alphabet('オオタク') == 'ohtaku' assert jaconv.kata2alphabet('ナンバ') == 'namba' assert jaconv.kata2alphabet('シバ') == 'siba' assert jaconv.kata2alphabet('シバ') == 'shiba' assert jaconv.kata2alphabet('ハンエイ') == 'hannei' # Small kana at start assert jaconv.kana2alphabet('ュイ') == 'yui' # Small kana in middle (non-standard pattern) assert jaconv.kana2alphabet('サクラュイ') == 'sakurayui' # Valid 拗音 patterns should still work assert jaconv.kana2alphabet('キュ') == 'kyu' assert jaconv.kana2alphabet('リュウ') == 'ryuu' def test_alphabet2kana(): assert jaconv.alphabet2kana('mamisan') == 'まみさん' assert jaconv.alphabet2kana('doggu doguu') == 'どっぐ どぐう' assert jaconv.alphabet2kana('botchi') == 'ぼっち' assert jaconv.alphabet2kana('fainarufantaji-') == 'ふぁいなるふぁんたじー' assert jaconv.alphabet2kana('atsui') == 'あつい' assert jaconv.alphabet2kana('itoh') == 'いとう' assert jaconv.alphabet2kana('ohtaku') == 'おおたく' assert jaconv.alphabet2kana('namba') == 'なんば' assert jaconv.alphabet2kana('siba') == 'しば' assert jaconv.alphabet2kana('shiba') == 'しば' assert jaconv.alphabet2kana('hannei') == 'はんえい' def test_alphabet2julius(): assert jaconv.hiragana2julius('てんき') == 't e N k i' assert jaconv.hiragana2julius('やったー') == 'y a q t a:' assert jaconv.hiragana2julius('かわいいいいい') == 'k a w a i:' assert jaconv.hiragana2julius('やろうぜ') == 'y a r o: z e' assert ( jaconv.hiragana2julius('てんきすごくいいいいいい') == 't e N k i s u g o k u i:' ) def test_enlarge_smallkana(): assert jaconv.enlarge_smallkana('キュゥべえ') == 'キユウべえ' assert jaconv.enlarge_smallkana('しゃえい') == 'しやえい' assert jaconv.enlarge_smallkana('しゅみ') == 'しゆみ' assert jaconv.enlarge_smallkana('きょういっぱい') == 'きよういつぱい' assert jaconv.enlarge_smallkana('霞ヶ関') == '霞ケ関' assert jaconv.enlarge_smallkana('一ヵ月') == '一カ月' assert jaconv.enlarge_smallkana('シャトー') == 'シヤトー' assert jaconv.enlarge_smallkana('チューリップ') == 'チユーリツプ' assert jaconv.enlarge_smallkana('ショート') == 'シヨート' assert jaconv.enlarge_smallkana('きょういっぱい', 'っ') == 'きよういっぱい' assert jaconv.enlarge_smallkana('きょういっぱい', 'ょっ') == 'きょういっぱい'