MIME-EcoEncode-0.95/0000755000076400007640000000000012241053107013527 5ustar murata0murata0MIME-EcoEncode-0.95/t/0000755000076400007640000000000012241053107013772 5ustar murata0murata0MIME-EcoEncode-0.95/t/EcoEncode.t0000644000076400007640000002564112221155312016012 0ustar murata0murata0#!/usr/bin/perl -w # This script is written in utf8 use strict; use warnings; use Test::More tests => 61; #use Test::More 'no_plan'; BEGIN { use_ok('MIME::EcoEncode') }; use Encode; use MIME::EcoEncode; my $str; my $encoded; $str = 'test0'; is(mime_eco($str, 'UTF-8'), $str, 'ASCII (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), $str, 'ASCII (ISO-2022-JP)'); $str = "test0\n"; is(mime_eco($str, 'UTF-8'), $str, 'ASCII . "\n" (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), $str, 'ASCII . "\n" (ISO-2022-JP)'); $str = 'a' x 80; is(mime_eco($str, 'UTF-8'), $str, 'ASCII x 80 (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), $str, 'ASCII x 80 (ISO-2022-JP)'); $str = 'Subject: ' . 'a' x 80; is(mime_eco($str, 'UTF-8'), $str, "\'Subject: \'" . ' . ASCII x 80 (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), $str, "\'Subject: \'" . ' . ASCII x 80 (ISO-2022-JP)'); $str = ' ' x 80; is(mime_eco($str, 'UTF-8'), $str, 'SP x 80 (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), $str, 'SP x 80 (ISO-2022-JP)'); $str = ('a' x 80 . ' ') x 3; is(mime_eco($str, 'UTF-8'), 'a' x 80 . ' ' . 'a' x 80 . "\n " . 'a' x 80 . ' ', '(ASCII x 80 + SP) x 3 (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), 'a' x 80 . ' ' . 'a' x 80 . "\n " . 'a' x 80 . ' ', '(ASCII x 80 + SP) x 3 (ISO-2022-JP)'); $str = '日本語あいうえおアイウエオ' x 2; is(mime_eco($str, 'UTF-8'), "=?UTF-8?B?" . "5pel5pys6Kqe44GC44GE44GG44GI44GK44Ki44Kk44Km44Ko44Kq5pel5pys6Kqe?=\n" . " =?UTF-8?B?44GC44GE44GG44GI44GK44Ki44Kk44Km44Ko44Kq?=", 'non-ASCII only (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), "=?ISO-2022-JP?B?" . "GyRCRnxLXDhsJCIkJCQmJCgkKiUiJSQlJiUoJSpGfEtcOGwkIiQkGyhC?=\n" . " =?ISO-2022-JP?B?GyRCJCYkKCQqJSIlJCUmJSglKhsoQg==?=", 'non-ASCII only (ISO-2022-JP)'); $str = ' ' . 'あ' . ' '; is(mime_eco($str, 'UTF-8'), ' =?UTF-8?B?44GC?= ', 'SP SP non-ASCII SP SP (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), ' =?ISO-2022-JP?B?GyRCJCIbKEI=?= ', 'SP SP non-ASCII SP SP (ISO-2022-JP)'); $str = ' ' . 'あ' . ' '; is(mime_eco($str, 'UTF-8', undef, undef, undef, 0), '=?UTF-8?B?ICDjgYIgIA==?=', '$lss=0 (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP', undef, undef, undef, 0), '=?ISO-2022-JP?B?ICAbJEIkIhsoQiAg?=', '$lss=0 (ISO-2022-JP)'); $str = " Subject: Re: [XXXX 0123] Re: アa イi ウu A-I-U\n"; is(mime_eco($str, 'UTF-8'), " Subject: Re: [XXXX 0123] Re: =?UTF-8?B?44KiYSAg44KkaSAg44KmdQ==?=" . " \n A-I-U\n", 'SP SP ASCII non-ASCII . "\n" (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), " Subject: Re: [XXXX 0123] Re: =?ISO-2022-JP?B?GyRCJSIbKEJhICA=?=" . "\n =?ISO-2022-JP?B?GyRCJSQbKEJpICAbJEIlJhsoQnU=?= A-I-U\n", 'SP SP ASCII non-ASCII . "\n" (ISO-2022-JP)'); $str = 'Subject: あいうえお アイウエオ アイウエオ A-I-U-E-O'; is(mime_eco($str, 'UTF-8'), "Subject: " . "=?UTF-8?B?44GC44GE44GG44GI44GKIOOCouOCpOOCpuOCqOOCqiDvvbHvvbI=?=\n" . " =?UTF-8?B?772z77207721?= A-I-U-E-O", 'ASCII non-ASCII HankakuKana (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), "Subject: " . "=?ISO-2022-JP?B?GyRCJCIkJCQmJCgkKhsoQiAbJEIlIiUkJSYlKCUqGyhCIA==?=\n" . " =?ISO-2022-JP?B?GyhJMTIzNDUbKEI=?= A-I-U-E-O", 'ASCII non-ASCII HankakuKana (ISO-2022-JP)'); $str = 'Subject: Re: あ A い I'; is(mime_eco($str, 'UTF-8', "|\n", 17), "Subject: Re:|\n =?UTF-8?B?44GC?=|\n A|\n =?UTF-8?B?44GE?=|\n I", '$lf="|\n", $bpl=17 (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP', "|\n", 31), "Subject: Re:|\n =?ISO-2022-JP?B?GyRCJCIbKEI=?=|\n A|\n" . " =?ISO-2022-JP?B?GyRCJCQbKEI=?=|\n I", '$lf="|\n", $bpl=31 (ISO-2022-JP)'); $str = 'Subject: Re: あ A い I う U え E お O'; is(mime_eco($str, 'UTF-8'), "Subject: " . "Re: =?UTF-8?B?44GC?= A =?UTF-8?B?44GE?= I =?UTF-8?B?44GG?= U\n" . " =?UTF-8?B?44GI?= E =?UTF-8?B?44GK?= O", 'ASCII non-ASCII (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), "Subject: " . "Re: =?ISO-2022-JP?B?GyRCJCIbKEI=?= A =?ISO-2022-JP?B?GyRCJCQbKEI=?=\n" . " I =?ISO-2022-JP?B?GyRCJCYbKEI=?= U =?ISO-2022-JP?B?GyRCJCgbKEI=?= E\n" . " =?ISO-2022-JP?B?GyRCJCobKEI=?= O", 'ASCII non-ASCII (ISO-2022-JP)'); $str = "Subject:\tア a\tイ i\tウ u\t\tA-I-U"; is(mime_eco($str, 'UTF-8'), "Subject:\t=?UTF-8?B?44Ki?= a\t=?UTF-8?B?44Kk?= i\t=?UTF-8?B?44Km?= u" . "\t\tA-I-U", 'ASCII non-ASCII . "\t" (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), "Subject:\t=?ISO-2022-JP?B?GyRCJSIbKEI=?= a" . "\t=?ISO-2022-JP?B?GyRCJSQbKEI=?= i\n" . "\t=?ISO-2022-JP?B?GyRCJSYbKEI=?= u\t\tA-I-U", 'ASCII non-ASCII . "\t" (ISO-2022-JP)'); $str = 'Subject: ' . 'x' x 50 . ' ' x 50 . 'あ'; is(mime_eco($str, 'UTF-8'), 'Subject: ' . 'x' x 50 . ' ' x 49 . "\n =?UTF-8?B?44GC?=", 'Long SP (UTF-8)'); is(mime_eco($str, 'ISO-2022-JP'), 'Subject: ' . 'x' x 50 . ' ' x 49 . "\n =?ISO-2022-JP?B?44GC?=", 'Long SP (ISO-2022-JP)'); my $from = 'From: OKAZAKI Sakurako '; $str = $from . ' (岡崎 桜子)'; is(mime_eco($str, 'UTF-8'), $from . ' (=?UTF-8?B?5bKh5bSOIOahnA==?=' . "\n " . '=?UTF-8?B?5a2Q?=)', 'structured header (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), $from . ' (=?ISO-2022-JP?B?GyRCMiwbKEI=?=' . "\n " . '=?ISO-2022-JP?B?GyRCOmobKEIgGyRCOnk7UhsoQg==?=)', 'structured header (ISO-2022-JP)'); $str = ' (あ) (((あ))) (あ (あ)) (あ (あ)) '; is(mime_eco($str, 'UTF-8'), ' (=?UTF-8?B?44GC?=) (((=?UTF-8?B?44GC?=))) (=?UTF-8?B?44GC?= ' . "\n" . ' (=?UTF-8?B?44GC?=)) (=?UTF-8?B?44GC?= (=?UTF-8?B?44GC?=)) ', 'comment in comment (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), ' (=?ISO-2022-JP?B?GyRCJCIbKEI=?=)' . ' (((=?ISO-2022-JP?B?GyRCJCIbKEI=?=))) ' . "\n" . ' (=?ISO-2022-JP?B?GyRCJCIbKEI=?= (=?ISO-2022-JP?B?GyRCJCIbKEI=?=)) ' . "\n" . ' (=?ISO-2022-JP?B?GyRCJCIbKEI=?= (=?ISO-2022-JP?B?GyRCJCIbKEI=?=)) ', 'comment in comment (ISO-2022-JP)'); $str = 'From: Sakura (桜桜 (岡崎))'; is(mime_eco($str), 'From: Sakura (=?UTF-8?B?5qGc5qGc?= (=?UTF-8?B?5bKh?=' . "\n" . ' =?UTF-8?B?5bSO?=))', 'comment in comment (2) (UTF-8)'); is(mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'), 'From: Sakura (=?ISO-2022-JP?B?GyRCOnk6eRsoQg==?=' . "\n" . ' (=?ISO-2022-JP?B?GyRCMiw6ahsoQg==?=))', 'comment in comment (2) (ISO-2022-JP)'); $str = 'From: A B ' . "(\xa9" . '!"#$%&\'()*+,-./:;<=>?@[\]^_`{|}~)'; is(mime_eco($str, 'UTF-8?Q'), 'From: A B ' . '(=?UTF-8?Q?=A9!=22=23=24=25=26=27=28=29*+=2C-=2E?=' . "\n" . ' =?UTF-8?Q?/=3A=3B=3C=3D=3E=3F=40=5B=5C=5D=5E=5F=60=7B=7C=7D=7E?=)', 'structured header (UTF-8?Q)'); $str = 'Subject: ' . "\xa9" . '!"#$%&\'()*+,-./:;<=>?@[\]^_`{|}~'; is(mime_eco($str, 'UTF-8?Q'), 'Subject: =?UTF-8?Q?=A9!"#$%&\'()*+,-./:;<=3D>=3F@[\]^=5F`{|}~?=', 'unstructured header (UTF-8?Q)'); $str = 'Subject: ' . "\x47\x72\xfc\xdf\x65"; is(mime_eco($str, 'ISO-8859-1'), 'Subject: =?ISO-8859-1?B?R3L832U=?=', 'ISO-8859-1'); is(mime_eco($str, 'ISO-8859-1?Q'), 'Subject: =?ISO-8859-1?Q?Gr=FC=DFe?=', 'ISO-8859-1?Q'); $str = 'Subject:' . " Hello \x47\x72\xfc\xdf\x65" x 3; $encoded = mime_eco($str, 'ISO-8859-15?Q'); is($encoded, 'Subject: Hello =?ISO-8859-15?Q?Gr=FC=DFe?= Hello ' . '=?ISO-8859-15?Q?Gr=FC=DFe?=' . "\n" . ' Hello =?ISO-8859-15?Q?Gr=FC=DFe?=', 'ISO-8859-15?Q'); is(mime_deco($encoded), $str, 'ISO-8859-15?Q (decode)'); $str = 'Subject: ' . "\xe4\xbd\xa0\xe5\xa5\xbd\xe3\x80\x82"; $encoded = mime_eco(encode('euc-cn', decode_utf8($str)), 'GB2312'); is($encoded, 'Subject: =?GB2312?B?xOO6w6Gj?=', 'GB2312'); is(encode_utf8(decode('euc-cn', mime_deco($encoded))), $str, 'GB2312 (decode)'); $str = 'Subject: ' . "\xbe\xc8\xb3\xe7\xc7\xcf\xbd\xca\xb4\xcf\xb1\xee\x3f"; $encoded = mime_eco($str, 'EUC-KR'); is($encoded, 'Subject: =?EUC-KR?B?vsiz58fPvcq0z7HuPw==?=', 'EUC-KR'); is(mime_deco($encoded), $str, 'EUC-KR (decode)'); $str = 'Subject: ' . "\xe4\xbd\xa0\xe5\xa5\xbd\xe3\x80\x82"; $encoded = mime_eco(encode('big5', decode_utf8($str)), 'Big5'); is($encoded, 'Subject: =?Big5?B?p0GmbqFD?=', 'Big5'); is(encode_utf8(decode('big5', mime_deco($encoded))), $str, 'Big5 (decode)'); $str = "From: Sakura (\xe6\xa1\x9c)"; is(mime_eco($str, 'UTF-8*ja-JP?Q'), 'From: Sakura (=?UTF-8*ja-JP?Q?=E6=A1=9C?=)', 'RFC2231\'s language'); $str = "Subject: Sakura (\xe6\xa1\x9c)" . " Hello (\xe4\xbd\xa0\xe5\xa5\xbd\xe3\x80\x82)"; $encoded = "Subject: Sakura =?ISO-2022-JP*ja?B?KBskQjp5GyhCKQ==?=" . " Hello =?GB2312?B?KMTjusOhoyk=?="; is(mime_deco($encoded, \&cb), $str, 'decode (multiple charsets)'); sub cb { my ($encoded_word, $charset, $language, $decoded_word) = @_; encode_utf8(decode($charset, $decoded_word)); } $str = "Subject: あ\tあ"; is(mime_eco($str, 'UTF-8?Q'), 'Subject: =?UTF-8?Q?=E3=81=82=09=E3=81=82?=', 'UTF-8?Q + TAB'); $str = "Subject: あ ) (あ"; $encoded = mime_eco($str, 'UTF-8?Q'); is(mime_deco($encoded), $str, 'UTF-8?Q + parenthesis'); my $str_j; $str = 'Subject: AあIいUうEえOお a AあIいU e'; $str_j = encode('7bit-jis', decode_utf8($str)); $encoded = mime_eco($str_j, 'ISO-2022-JP'); is($encoded, "Subject: " . "=?ISO-2022-JP?B?QRskQiQiGyhCSRskQiQkGyhCVRskQiQmGyhCRRskQiQoGyhC?=\n" . " =?ISO-2022-JP?B?TxskQiQqGyhC?= a " . "=?ISO-2022-JP?B?QRskQiQiGyhCSRskQiQkGyhC?=\n" . " =?ISO-2022-JP?B?VQ==?= e", 'ISO-2022-JP encode + folding 1'); is(mime_deco($encoded), $str_j, 'ISO-2022-JP decode + folding 1'); $str = 'Subject: aあaあaあaあ '; $str_j = encode('7bit-jis', decode_utf8($str)); $encoded = mime_eco($str_j, 'ISO-2022-JP'); is($encoded, "Subject: " . "=?ISO-2022-JP?B?YRskQiQiGyhCYRskQiQiGyhCYRskQiQiGyhCYQ==?=\n" . " =?ISO-2022-JP?B?GyRCJCIbKEI=?= ", 'ISO-2022-JP encode + folding 2'); is(mime_deco($encoded), $str_j, 'ISO-2022-JP decode + folding 2'); $str = 'Subject: アあ000000アアあアあ'; $str_j = encode('7bit-jis', decode_utf8($str)); $encoded = mime_eco($str_j, 'ISO-2022-JP'); is($encoded, "Subject: " . "=?ISO-2022-JP?B?GyhJMRskQiQiGyhCMDAwMDAwGyhJMTEbJEIkIhsoSTEbKEI=?=\n" . " =?ISO-2022-JP?B?GyRCJCIbKEI=?=", 'ISO-2022-JP encode + folding 3'); is(mime_deco($encoded), $str_j, 'ISO-2022-JP decode + folding 3'); $str = "Subject:" . ' ' x 200 . 'a あ'; $encoded = mime_eco($str); is($encoded, "Subject:" . ' ' x 200 . "a\n" . ' =?UTF-8?B?44GC?=', 'very long whitespace'); $str = 'Subject: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa あ '; $str_j = encode('7bit-jis', decode_utf8($str)); $encoded = mime_eco($str_j, 'ISO-2022-JP'); is($encoded, "Subject: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" . " =?ISO-2022-JP?B?GyRCJCIbKEI=?= ", 'ISO-2022-JP encode + folding 4'); MIME-EcoEncode-0.95/t/Fold.t0000644000076400007640000000656712240353042015061 0ustar murata0murata0#!/usr/bin/perl -w # This script is written in utf8 use strict; use warnings; use Test::More tests => 14; #use Test::More 'no_plan'; BEGIN { use_ok('MIME::EcoEncode::Fold') }; use Encode; use MIME::EcoEncode::Fold; my $in_utf8; my $out_utf8; my $in; my $out; my $str; $in_utf8 =<<"END"; あああ00000aaaaaaあああ00000aaaaaaアアア00000aaあああ アアア00000aaaaaaaあああ00000aaaaaaaあああ00000aaaaアア 00000aaaaaa00000aaaaaaaaa00000aaaaaaaaaaaaaaa00000 END # # test 2 # $out_utf8 =<<"END"; あああ00000aaaaaaあああ0 0000aaaaaaアアア00000aaあ ああ アアア00000aaaaaaaあああ 00000aaaaaaaあああ00000aaa aアア 00000aaaaaa00000aaaaaaaaa00000 aaaaaaaaaaaaaaa00000 END $in = $in_utf8; $out = $out_utf8; is(mime_eco_fold($in, 'UTF-8', "\n", 30), $out, 'UTF-8 "\n" 30'); # # test 3 # $out_utf8 =<<"END"; あああ00000aaaaaaあああ0 0000aaaaaaアアア00000aa あああ アアア00000aaaaaaaあああ 00000aaaaaaaあああ00000aa aaアア 00000aaaaaa00000aaaaaaaaa00000 aaaaaaaaaaaaaaa00000 END $in = $in_utf8; $out = $out_utf8; is(mime_eco_fold($in, 'UTF-8', undef, 30), $out, 'UTF-8 undef 30'); # # test 4 # $out_utf8 =<<"END"; あああ00000aaaaaa あああ00000aaaaaaア アア00000aaあああ アアア00000aaaaaaaあ ああ00000aaaaaaaあ ああ00000aaaaアア 00000aaaaaa00000aaaaaaaaa00000 aaaaaaaaaaaaaaa00000 END $in = encode('7bit-jis', decode_utf8($in_utf8)); $out = encode('7bit-jis', decode_utf8($out_utf8)); is(mime_eco_fold($in, 'ISO-2022-JP', "\n", 30), $out, 'ISO-2022-JP "\n" 30'); # # test 5 # $out_utf8 =<<"END"; あああ00000aaaaaa あああ00000aaaaaa アアア00000aaあああ アアア00000aaaaaaaあ ああ00000aaaaaaa あああ00000aaaaアア 00000aaaaaa00000aaaaaaaaa00000 aaaaaaaaaaaaaaa00000 END $in = encode('7bit-jis', decode_utf8($in_utf8)); $out = encode('7bit-jis', decode_utf8($out_utf8)); is(mime_eco_fold($in, 'ISO-2022-JP', undef, 30), $out, 'ISO-2022-JP undef 30'); # # test 6 # $out_utf8 =<<"END"; あああ00000aaaaaaあああ00000aa aaaaアアア00000aaあああ アアア00000aaaaaaaあああ00000aaaa aaaあああ00000aaaaアア 00000aaaaaa00000aaaaaaaaa00000 aaaaaaaaaaaaaaa00000 END $in = encode('cp932', decode_utf8($in_utf8)); $out = encode('cp932', decode_utf8($out_utf8)); is(mime_eco_fold($in, 'Shift_JIS', "\n", 30), $out, 'Shift_JIS "\n" 30'); # # test 7 # $out_utf8 =<<"END"; あああ00000aaaaaaあああ00000aa aaaaアアア00000aaあああ アアア00000aaaaaaaあああ00000aaaa aaaあああ00000aaaaアア 00000aaaaaa00000aaaaaaaaa00000 aaaaaaaaaaaaaaa00000 END $in = encode('cp932', decode_utf8($in_utf8)); $out = encode('cp932', decode_utf8($out_utf8)); is(mime_eco_fold($in, 'Shift_JIS', undef, 30), $out, 'Shift_JIS undef 30'); $str = " \n "; is(mime_eco_fold($str, 'UTF-8', undef, 30), $str, 'SP + "\n" + SP'); $str = ""; is(mime_eco_fold($str, 'UTF-8', undef, 30), $str, 'zero length'); $str = undef; is(mime_eco_fold($str, 'UTF-8', undef, 30), "", 'undef'); $str = "test0\x0d"; is(mime_eco_fold($str, 'UTF-8', undef, 30), $str, '\x0d'); $str = "test0\x0a"; is(mime_eco_fold($str, 'UTF-8', undef, 30), $str, '\x0a'); $str = "test0\x0d\x0a"; is(mime_eco_fold($str, 'UTF-8', undef, 30), $str, '\x0d\x0a'); $in = "00000aaaaaa00000aaaaaaaaa00000aaaaaaaaaaaaaaa00000"; $out = "00000aaaaaa00000aaaaaaaaa00000\n aaaaaaaaaaaaaaa00000"; is(mime_eco_fold($in, 'UTF-8', undef, 30), $out, 'ASCII'); MIME-EcoEncode-0.95/t/Param.t0000644000076400007640000000635712240616245015241 0ustar murata0murata0#!/usr/bin/perl -w # This script is written in utf8 use strict; use warnings; use Test::More tests => 19; #use Test::More 'no_plan'; BEGIN { use_ok('MIME::EcoEncode::Param') }; use Encode; use MIME::EcoEncode::Param; my $in_utf8; my $out_utf8; my $in; my $out; my $str; my $encoded; $in_utf8 = " filename=あああ000aaあああ000aaアアア000aaあああ\n"; # # test 2, 3 # $in = $in_utf8; $out =<<'END'; filename*0*=UTF-8'ja'%E3%81%82%E3%81%82%E3%81%82; filename*1*=000aa%E3%81%82%E3%81%82%E3%81%82000a; filename*2*=a%EF%BD%B1%EF%BD%B1%EF%BD%B1000aa; filename*3*=%E3%81%82%E3%81%82%E3%81%82 END $encoded = mime_eco_param($in, "UTF-8'ja'", "\n", 50); is($encoded, $out, 'UTF-8\'ja\' "\n" 50'); is(mime_deco_param($encoded), $in, 'UTF-8\'ja\' "\n" 50 - decode'); # # test 4, 5 # $in = encode('7bit-jis', decode_utf8($in_utf8)); $out =<<'END'; filename*0*=ISO-2022-JP''%1B$B$%22$%22%1B%28B; filename*1*=%1B$B$%22%1B%28B000aa; filename*2*=%1B$B$%22$%22$%22%1B%28B000aa; filename*3*=%1B%28I111%1B%28B000aa; filename*4*=%1B$B$%22$%22$%22%1B%28B END $encoded = mime_eco_param($in, "ISO-2022-JP", "\n", 50); is($encoded, $out, 'ISO-2022-JP "\n" 50'); is(mime_deco_param($encoded), $in, 'ISO-2022-JP "\n" 50 - decode'); # # test 6, 7 # $in = encode('cp932', decode_utf8($in_utf8)); $out =<<'END'; filename*0*=Shift_JIS''%82%A0%82%A0%82%A0000aa; filename*1*=%82%A0%82%A0%82%A0000aa%B1%B1%B1000a; filename*2*=a%82%A0%82%A0%82%A0 END $encoded = mime_eco_param($in, "Shift_JIS", "\n", 50); is($encoded, $out, 'Shift_JIS "\n" 50'); is(mime_deco_param($encoded), $in, 'Shift_JIS "\n" 50 - decode'); # # RFC 2231 Section 3 example # $in =<<'END'; URL*0="ftp://"; URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" END $out =<<'END'; URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" END is(mime_deco_param($in), $out, 'decode 1'); # # RFC 2231 Section 4 example # $in =<<'END'; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A END $out =<<'END'; title=This is ***fun*** END is(mime_deco_param($in), $out, 'decode 2'); # # RFC 2231 Section 4.1 example # $in =<<'END'; title*0*=us-ascii'en'This%20is%20even%20more%20 title*1*=%2A%2A%2Afun%2A%2A%2A%20 title*2="isn't it!" END $out =<<'END'; title="This is even more ***fun*** isn't it!" END is(mime_deco_param($in), $out, 'decode 3'); # # RFC 2231 Section 4.1 example (corrected version, Errata ID: 590) # $in =<<'END'; title*0*=us-ascii'en'This%20is%20even%20more%20; title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!" END $out =<<'END'; title="This is even more ***fun*** isn't it!" END is(mime_deco_param($in), $out, 'decode 4'); $str = " \n "; is(mime_eco_param($str), ' ', 'SP + "\n" + SP'); $str = ""; is(mime_eco_param($str), $str, 'zero length'); $str = undef; is(mime_eco_param($str), "", 'undef'); $str = "test0\x0d"; is(mime_eco_param($str), $str, '\x0d'); $str = "test0\x0a"; is(mime_eco_param($str), $str, '\x0a'); $str = "test0\x0d\x0a"; is(mime_eco_param($str), $str, '\x0d\x0a'); $str = " name=\"=?UTF-8?B?5a+M5aOr5bGxXzIwMTMuanBlZw==?=\""; is(mime_deco_param($str, 0), $str, 'decode B/Q - OFF'); $in = " name=\"=?UTF-8?B?5a+M5aOr5bGxXzIwMTMuanBlZw==?=\""; $out = " name=\"\xe5\xaf\x8c\xe5\xa3\xab\xe5\xb1\xb1_2013.jpeg\""; is(mime_deco_param($in, 1), $out, 'decode B/Q - ON'); MIME-EcoEncode-0.95/Makefile.PL0000644000076400007640000000114712241052575015514 0ustar murata0murata0use 5.008005; use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( NAME => 'MIME::EcoEncode', VERSION_FROM => 'lib/MIME/EcoEncode.pm', # finds $VERSION LICENSE => 'perl', PREREQ_PM => {MIME::Base64 => 3.01}, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/MIME/EcoEncode.pod', # retrieve abstract from module AUTHOR => 'MURATA Yasuhisa ') : ()), ); MIME-EcoEncode-0.95/META.yml0000664000076400007640000000076012241053107015005 0ustar murata0murata0--- abstract: 'MIME Encoding (Economical)' author: - 'MURATA Yasuhisa ' build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: MIME-EcoEncode no_index: directory: - t - inc requires: MIME::Base64: 3.01 version: 0.95 MIME-EcoEncode-0.95/META.json0000664000076400007640000000156412241053107015160 0ustar murata0murata0{ "abstract" : "MIME Encoding (Economical)", "author" : [ "MURATA Yasuhisa " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "MIME-EcoEncode", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "MIME::Base64" : "3.01" } } }, "release_status" : "stable", "version" : "0.95" } MIME-EcoEncode-0.95/README0000644000076400007640000000053212241050302014401 0ustar murata0murata0MIME-EcoEncode version 0.95 =========================== INSTALLATION To install this module type the following: perl Makefile.PL make make test make install COPYRIGHT AND LICENCE Copyright (C) 2011-2013 MURATA Yasuhisa This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. MIME-EcoEncode-0.95/lib/0000755000076400007640000000000012241053107014275 5ustar murata0murata0MIME-EcoEncode-0.95/lib/MIME/0000755000076400007640000000000012241053107015024 5ustar murata0murata0MIME-EcoEncode-0.95/lib/MIME/EcoEncode/0000755000076400007640000000000012241053107016650 5ustar murata0murata0MIME-EcoEncode-0.95/lib/MIME/EcoEncode/Fold.pm0000644000076400007640000001152412240564314020103 0ustar murata0murata0# Copyright (C) 2013 MURATA Yasuhisa # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package MIME::EcoEncode::Fold; use 5.008005; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw($VERSION); our @EXPORT = qw(mime_eco_fold); our $VERSION = '0.95'; our $LF; # line feed our $BPL; # bytes per line our $UTF8; our $REG_W; our $SPL; sub mime_eco_fold { my $str = shift; return '' unless defined $str; return '' if $str eq ''; my $charset = shift || 'UTF-8'; my $cs; if ($charset =~ /^([-0-9A-Za-z_]+)$/i) { $cs = lc($1); } else { # invalid option return undef; } our $LF = shift || "\n "; # line feed our $BPL = shift || 990; # bytes per line our $UTF8 = 1; our $REG_W = qr/(.)/; $LF =~ /([^\x0d\x0a]*)$/; our $SPL = length($1); my $jp = 0; if ($cs ne 'utf-8') { $UTF8 = 0; if ($cs eq 'iso-2022-jp') { $jp = 1; } elsif ($cs eq 'shift_jis') { # range of 2nd byte : [\x40-\x7e\x80-\xfc] $REG_W = qr/([\x81-\x9f\xe0-\xfc]?.)/; } elsif ($cs eq 'gb2312') { # Simplified Chinese # range of 2nd byte : [\xa1-\xfe] $REG_W = qr/([\xa1-\xfe]?.)/; } elsif ($cs eq 'euc-kr') { # Korean # range of 2nd byte : [\xa1-\xfe] $REG_W = qr/([\xa1-\xfe]?.)/; } elsif ($cs eq 'big5') { # Traditional Chinese # range of 2nd byte : [\x40-\x7e\xa1-\xfe] $REG_W = qr/([\x81-\xfe]?.)/; } else { # Single Byte (Latin, Cyrillic, ...) ; } } my $result = ''; my $refsub = $jp ? \&line_fold_jp : \&line_fold; my $odd = 0; for my $line (split /(\x0d?\x0a|\x0d)/, $str) { if ($odd) { $result .= $line; $odd = 0; } else { $result .= &$refsub($line); $odd = 1; } } return $result; } sub line_fold { my $str = shift; return '' if $str eq ''; my $str_len = length($str); our $BPL; return $str if $str_len <= $BPL; our $LF; our $UTF8; our $REG_W; our $SPL; my $w = ''; my $w_len; my $w_bak = ''; my $result = ''; my $max_len = $BPL; my ($chunk, $chunk_len) = ('', 0); my $str_pos = 0; utf8::decode($str) if $UTF8; # UTF8 flag on while ($str =~ /$REG_W/g) { $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $w_len = length($w); # size of one character if ($chunk_len + $w_len > $max_len) { $result .= $chunk . "$LF"; $str_pos += $chunk_len; $max_len = $BPL - $w_len - $SPL; if ($str_len - $str_pos <= $max_len) { utf8::encode($str) if $UTF8; # UTF8 flag off $chunk = substr($str, $str_pos); last; } $chunk = $w; $chunk_len = $w_len; } else { $chunk .= $w; $chunk_len += $w_len; } } return $result . $chunk; } sub line_fold_jp { my $str = shift; return '' if $str eq ''; our $BPL; return $str if length($str) <= $BPL; our $LF; our $SPL; my $k_in = 0; # ascii: 0, zen: 1 or 2, han: 9 my $k_in_bak = -1; my $k_out; my $ec; my $w1; my $w1_bak = ''; my $w = ''; my $w_len; my $w_bak = ''; my $result = ''; my $max_len = $BPL; while ($str =~ /(\e(..)|.)/g) { ($w1, $ec) = ($1, $2); $w .= $w1; if (defined $ec) { $w1_bak = $w1; if ($ec eq '(B') { $k_in = 0; } elsif ($ec eq '$B') { $k_in = 1; } else { $k_in = 9; } next; } else { if ($k_in == 1) { $k_in = 2; next; } elsif ($k_in == 2) { $k_in = 1; } } $k_out = $k_in ? 3 : 0; # 3 is "\e\(B" if (pos($str) + $k_out > $max_len) { $w_len = length($w); if ($k_in_bak) { $result .= $w_bak . substr($str, 0, pos($str) - $w_len, "") . "\e\(B$LF"; if ($k_in) { if ($k_in_bak == $k_in) { $w = $w1_bak . $w; } } else { $w = $w1; } } else { $result .= $w_bak . substr($str, 0, pos($str) - $w_len, "") . "$LF"; } substr($str, 0, $w_len, ""); $max_len = $BPL - length($w) - $SPL; if (length($str) <= $max_len) { return $result . $w . $str; } $w_bak = $w; } $k_in_bak = $k_in; $w = ''; } return $result . $w_bak . $str; # impossible } 1; MIME-EcoEncode-0.95/lib/MIME/EcoEncode/Param.pod0000644000076400007640000000603512240616572020432 0ustar murata0murata0=head1 NAME MIME::EcoEncode::Param - RFC 2231 Encode/Decode =head1 SYNOPSIS use MIME::EcoEncode::Param; $encoded = mime_eco_param($str, 'UTF-8'); # encode utf8 string $encoded = mime_eco_param($str, "UTF-8'XX'"); # XX is language $encoded = mime_eco_param($str, 'UTF-8*XX?B'); # "B" encoding $encoded = mime_eco_param($str, 'UTF-8*XX?Q'); # "Q" encoding $encoded = mime_eco_param($str, 'GB2312'); # euc-cn string $encoded = mime_eco_param($str, 'EUC-KR'); # euc-kr string $encoded = mime_eco_param($str, 'Big5'); # big5 string $encoded = mime_eco_param($str, 'Shift_JIS'); # cp932 string $encoded = mime_eco_param($str, 'ISO-2022-JP'); # 7bit-jis string $encoded = mime_eco_param($str, $sbcs); # $sbcs : # single-byte charset # (e.g. 'ISO-8859-1') $decoded = mime_deco_param($encoded); # decode encoded string ($decoded, $param, $charset, $lang, $value) # return array = mime_deco_param($encoded); =head1 DESCRIPTION This module implements RFC 2231 Mime Parameter Value Encoding. =head2 Options $encoded = mime_eco_param($str, $charset, $lf, $bpl); # $charset : 'UTF-8' / "UTF-8'XX'" / # 'UTF-8*XX?B' / 'UTF-8*XX?Q' / # 'GB2312' / 'EUC-KR' / 'Big5' / # 'Shift_JIS' / 'ISO-2022-JP' / ... # (default: 'UTF-8') # Note: The others are all encoded as # single-byte string. # $lf : line feed (default: "\n") # $bpl : bytes per line (default: 76) $decoded = mime_deco_param($encoded, $bq_on); # $bq_on : 1 : ON decode "B/Q" encoding # 0 : OFF # (default: 1) =head2 Examples Ex1 - RFC 2231 encoding use MIME::EcoEncode::Param; my $str = " filename=\xe5\xaf\x8c\xe5\xa3\xab\xe5\xb1\xb1_2013.jpeg"; print mime_eco_param($str, "UTF-8'ja'", "\n", 33), "\n"; Ex1's output: filename*0*=UTF-8'ja'%E5%AF%8C; filename*1*=%E5%A3%AB%E5%B1%B1_; filename*2=2013.jpeg Ex2 - "Q" encoding (for Windows) use MIME::EcoEncode::Param; my $str = " name=\xe5\xaf\x8c\xe5\xa3\xab\xe5\xb1\xb1_2013.jpeg"; print mime_eco_param($str, 'UTF-8?Q', "\n", 33), "\n"; Ex2's output: name="=?UTF-8?Q?=E5=AF=8C=E5?= =?UTF-8?Q?=A3=AB=E5=B1=B1=5F20?= =?UTF-8?Q?13.jpeg?=" Ex3 - "B" encoding (for Windows) use MIME::EcoEncode::Param; my $str = " name=\xe5\xaf\x8c\xe5\xa3\xab\xe5\xb1\xb1_2013.jpeg"; print mime_eco_param($str, 'UTF-8?B', "\n", 33), "\n"; Ex3's output: name="=?UTF-8?B?5a+M5aOr5bGx?= =?UTF-8?B?XzIwMTMuanBlZw==?=" =head1 SEE ALSO L =head1 AUTHOR MURATA Yasuhisa Emurata@nips.ac.jpE =head1 COPYRIGHT Copyright (C) 2013 MURATA Yasuhisa =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut MIME-EcoEncode-0.95/lib/MIME/EcoEncode/Param.pm0000644000076400007640000004072512241034676020270 0ustar murata0murata0# Copyright (C) 2013 MURATA Yasuhisa # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package MIME::EcoEncode::Param; use 5.008005; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw($VERSION); our @EXPORT = qw(mime_eco_param mime_deco_param); our $VERSION = '0.95'; our $HEAD; # head string our $HTL; # head + tail length our $LF; # line feed our $BPL; # bytes per line our $UTF8; our $REG_W; sub mime_eco_param { my $str = shift; return '' unless defined $str; return '' if $str eq ''; my ($trailing_crlf) = ($str =~ /(\x0d?\x0a|\x0d)$/); $str =~ tr/\n\r//d; if ($str =~ /^\s*$/) { return $trailing_crlf ? $str . $trailing_crlf : $str; } my $charset = shift || 'UTF-8'; our $HEAD; # head string my $cs; my $type; # 0: RFC 2231, 1: "Q", 2: "B" if ($charset =~ /^([-0-9A-Za-z_]+)(\'[^\']*\')?$/i) { $cs = lc($1); $type = 0; $HEAD = $2 ? $charset : $charset . "''"; } elsif ($charset =~ /^([-0-9A-Za-z_]+)(\*[^\?]*)?(\?[QB])?$/i) { $cs = lc($1); if (defined $3) { $type = (lc($3) eq '?q') ? 1 : 2; $HEAD = '=?' . $charset . '?'; } else { $type = 2; $HEAD = '=?' . $charset . '?B?'; } } else { # invalid option return undef; } our $HTL; # head + tail length our $LF = shift || "\n"; # line feed our $BPL = shift || 76; # bytes per line our $UTF8 = 1; our $REG_W = qr/(.)/; my $jp = 0; my $np; $HTL = length($HEAD) + 2; if ($cs ne 'utf-8') { $UTF8 = 0; if ($cs eq 'iso-2022-jp') { $jp = 1; } elsif ($cs eq 'shift_jis') { # range of 2nd byte : [\x40-\x7e\x80-\xfc] $REG_W = qr/([\x81-\x9f\xe0-\xfc]?.)/; } elsif ($cs eq 'gb2312') { # Simplified Chinese # range of 2nd byte : [\xa1-\xfe] $REG_W = qr/([\xa1-\xfe]?.)/; } elsif ($cs eq 'euc-kr') { # Korean # range of 2nd byte : [\xa1-\xfe] $REG_W = qr/([\xa1-\xfe]?.)/; } elsif ($cs eq 'big5') { # Traditional Chinese # range of 2nd byte : [\x40-\x7e\xa1-\xfe] $REG_W = qr/([\x81-\xfe]?.)/; } else { # Single Byte (Latin, Cyrillic, ...) ; } } $str =~ s/^(\s*)//; # leading whitespace my $sps = $1; my ($param, $value) = split('=', $str, 2); unless (defined $value) { return $trailing_crlf ? $str . $trailing_crlf : $str; } my $quote = 0; if ($value =~ s/^\s*"(.*)"$/$1/) { $quote = 1; } if ($value eq '') { return $trailing_crlf ? $str . $trailing_crlf : $str; } my $result = "$sps$param="; my $v_len = length($value); my $ll_len = length($result); if (!$quote && $value !~ /[^\w!#\$&\+-\.\^`\{\|}~]/) { # regular token if ($type or $ll_len + $v_len <= $BPL) { $result .= $value; return $trailing_crlf ? $result . $trailing_crlf : $result; } my $n = 0; my $c; my $p_str; $result = "$sps$param\*0="; $ll_len += 2; while ($value =~ /(.)/g) { $c = $1; if ($ll_len + 1 > $BPL) { $n++; $p_str = " $param\*$n="; $result .= "$LF$p_str$c"; $ll_len = 1 + length($p_str); } else { $result .= $c; $ll_len++; } } return $trailing_crlf ? $result . $trailing_crlf : $result; } if ($quote && $value !~ /[^\t\x20-\x7e]/) { # regular quoted-string if ($type or $ll_len + $v_len + 2 <= $BPL) { $result .= "\"$value\""; return $trailing_crlf ? $result . $trailing_crlf : $result; } my $n = 0; my $vc; my $vc_len; my $p_str; $result = "$sps$param\*0=\""; $ll_len += 3; while ($value =~ /(\\.|.)/g) { $vc = $1; $vc_len = length($vc); if ($ll_len + $vc_len + 1 > $BPL) { $n++; $p_str = " $param\*$n=\""; $result .= "\"$LF$p_str$vc"; $ll_len = $vc_len + length($p_str); } else { $result .= $vc; $ll_len += $vc_len; } } $result .= '"'; return $trailing_crlf ? $result . $trailing_crlf : $result; } # # extended parameter (contain regular parameter) # if ($jp) { if ($type == 0) { return param_enc_jp($param, $value, $sps, $trailing_crlf, $quote); } if ($type == 1) { # "Q" encoding require MIME::EcoEncode::JP_Q; $MIME::EcoEncode::JP_Q::HEAD = $HEAD; $MIME::EcoEncode::JP_Q::HTL = $HTL; $MIME::EcoEncode::JP_Q::LF = $LF; $MIME::EcoEncode::JP_Q::BPL = $BPL; $MIME::EcoEncode::JP_Q::MODE = 0; my $enc = MIME::EcoEncode::JP_Q::add_ew_jp_q($value, length($result) + 1, \$np, 1, 1); if ($enc eq ' ') { $enc = MIME::EcoEncode::JP_Q::add_ew_jp_q($value, 2, \$np, 1); $result .= "$LF \"$enc\""; } else { $result .= "\"$enc\""; } return $trailing_crlf ? $result . $trailing_crlf : $result; } else { # "B" encoding require MIME::EcoEncode::JP_B; $MIME::EcoEncode::JP_B::HEAD = $HEAD; $MIME::EcoEncode::JP_B::HTL = $HTL; $MIME::EcoEncode::JP_B::LF = $LF; $MIME::EcoEncode::JP_B::BPL = $BPL; my $enc = MIME::EcoEncode::JP_B::add_ew_jp_b($value, length($result) + 1, \$np, 1, 1); if ($enc eq ' ') { $enc = MIME::EcoEncode::JP_B::add_ew_jp_b($value, 2, \$np, 1); $result .= "$LF \"$enc\""; } else { $result .= "\"$enc\""; } return $trailing_crlf ? $result . $trailing_crlf : $result; } } if ($type == 0) { return param_enc($param, $value, $sps, $trailing_crlf, $quote); } if ($type == 1) { # "Q" encoding require MIME::EcoEncode; $MIME::EcoEncode::HEAD = $HEAD; $MIME::EcoEncode::HTL = $HTL; $MIME::EcoEncode::LF = $LF; $MIME::EcoEncode::BPL = $BPL; $MIME::EcoEncode::REG_W = $REG_W; my $enc = MIME::EcoEncode::add_ew_q($value, length($result) + 1, \$np, 1, 1); if ($enc eq ' ') { $enc = MIME::EcoEncode::add_ew_q($value, 2, \$np, 1); $result .= "$LF \"$enc\""; } else { $result .= "\"$enc\""; } return $trailing_crlf ? $result . $trailing_crlf : $result; } else { # "B" encoding require MIME::EcoEncode; $MIME::EcoEncode::HEAD = $HEAD; $MIME::EcoEncode::HTL = $HTL; $MIME::EcoEncode::LF = $LF; $MIME::EcoEncode::BPL = $BPL; $MIME::EcoEncode::REG_W = $REG_W; my $enc = MIME::EcoEncode::add_ew_b($value, length($result) + 1, \$np, 1, 1); if ($enc eq ' ') { $enc = MIME::EcoEncode::add_ew_b($value, 2, \$np, 1); $result .= "$LF \"$enc\""; } else { $result .= "\"$enc\""; } return $trailing_crlf ? $result . $trailing_crlf : $result; } } sub param_enc { my $param = shift; my $value = shift; my $sps = shift; my $trailing_crlf = shift; my $quote = shift; my $result; my $ll_len; our $UTF8; our $REG_W; our $HEAD; $value = "\"$value\"" if $quote; my $vstr = $value; $value =~ s/([^\w!#\$&\+-\.\^`\{\|}~])/ sprintf("%%%X",ord($1))/egox; $result = "$sps$param\*=$HEAD"; if (length($result) + length($value) <= $BPL) { $result .= $value; return $trailing_crlf ? $result . $trailing_crlf : $result; } my $n = 0; my $nn = 1; my $w1; my $p_str; my $w; my $w_len; my $chunk = ''; my $ascii = 1; $result = "$sps$param\*0\*=$HEAD"; $ll_len = length($result); utf8::decode($vstr) if $UTF8; # UTF8 flag on while ($vstr =~ /$REG_W/g) { $w1 = $1; utf8::encode($w1) if $UTF8; # UTF8 flag off $w_len = length($w1); # size of one character $value =~ /((?:%..|.){$w_len})/g; $w = $1; $w_len = length($w); $ascii = 0 if $w_len > 1; # 1 is ';' if ($ll_len + $w_len + 1 > $BPL) { $p_str = " $param\*$nn\*="; if ($ascii) { if ($n == 0) { $result = "$sps$param\*0=$HEAD$chunk$w;"; } else { $result .= "$LF $param\*$n=$chunk$w;"; } $ll_len = length($p_str); $chunk = ''; } else { if ($n == 0) { $result = "$result$chunk;"; } else { $result .= "$LF $param\*$n\*=$chunk;"; } $ll_len = length($p_str) + $w_len; $chunk = $w; } $ascii = 1 if $w_len == 1; $n = $nn; $nn++; } else { $chunk .= $w; $ll_len += $w_len; } } if ($ascii) { if ($chunk eq '') { chop($result); } else { $result .= "$LF $param\*$n=$chunk"; } } else { $result .= "$LF $param\*$n\*=$chunk"; } return $trailing_crlf ? $result . $trailing_crlf : $result; } sub param_enc_jp { my $param = shift; my $value = shift; my $sps = shift; my $trailing_crlf = shift; my $quote = shift; my $result; my $ll_len; our $HEAD; $value = "\"$value\"" if $quote; my $vstr = $value; $value =~ s/([^\w!#\$&\+-\.\^`\{\|}~])/ sprintf("%%%X",ord($1))/egox; $result = "$sps$param\*=$HEAD"; if (length($result) + length($value) <= $BPL) { $result .= $value; return $trailing_crlf ? $result . $trailing_crlf : $result; } my $n = 0; my $nn = 1; my $p_str; my $ascii = 1; my $ee_str = '%1B%28B'; my $ee_len = 7; my $vstr_len = length($vstr); my $k_in = 0; # ascii: 0, zen: 1 or 2, han: 9 my $k_in_bak = 0; my $ec; my ($w, $w_len) = ('', 0); my ($chunk, $chunk_len) = ('', 0); my ($w1, $w1_bak); my $enc_len; $vstr =~ s/\e\(B$//; $result = "$sps$param\*0\*=$HEAD"; $ll_len = length($result); while ($vstr =~ /\e(..)|./g) { $ec = $1; $value =~ /(%1B(?:%..|.)(?:%..|.)|(?:%..|.))/g; $w1 = $1; $w .= $w1; if (defined $ec) { $w1_bak = $w1; if ($ec eq '(B') { $k_in = 0; } elsif ($ec eq '$B') { $k_in = 1; } else { $k_in = 9; } next; } else { if ($k_in == 1) { $k_in = 2; next; } elsif ($k_in == 2) { $k_in = 1; } } $w_len = length($w); $enc_len = $w_len + ($k_in ? $ee_len : 0); $ascii = 0 if $w_len > 1; # 1 is ';' if ($ll_len + $enc_len + 1 > $BPL) { $p_str = " $param\*$nn\*="; if ($ascii) { if ($n == 0) { $result = "$sps$param\*0=$HEAD$chunk$w;"; } else { $result .= "$LF $param\*$n=$chunk$w;"; } $ll_len = length($p_str); $chunk = ''; } else { if ($k_in_bak) { $chunk .= $ee_str; if ($k_in) { if ($k_in_bak == $k_in) { $w = $w1_bak . $w; $w_len += length($w1_bak); } } else { $w = $w1; $w_len = length($w1); } } if ($n == 0) { $result = "$result$chunk;"; } else { $result .= "$LF $param\*$n\*=$chunk;"; } $ll_len = length($p_str) + $w_len; $chunk = $w; } $ascii = 1 if $w_len == 1; $n = $nn; $nn++; } else { $chunk .= $w; $ll_len += $w_len; } $k_in_bak = $k_in; $w = ''; $w_len = 0; } if ($ascii) { if ($chunk eq '') { chop($result); } else { $result .= "$LF $param\*$n=$chunk"; } } else { $chunk .= $ee_str if $k_in_bak; $result .= "$LF $param\*$n\*=$chunk"; } return $trailing_crlf ? $result . $trailing_crlf : $result; } sub mime_deco_param { my $str = shift; if ((!defined $str) || $str eq '') { return ('') x 5 if wantarray; return ''; } my ($trailing_crlf) = ($str =~ /(\x0d?\x0a|\x0d)$/); $str =~ tr/\n\r//d; if ($str =~ /^\s*$/) { return ($trailing_crlf ? $str . $trailing_crlf : $str, ('') x 4) if wantarray; return $trailing_crlf ? $str . $trailing_crlf : $str; } $str =~ s/^(\s*)//; # leading whitespace my $sps = $1; my $result = ''; my ($param, $value, $charset, $lang); my ($param0, $value0, $charset0, $lang0) = ('') x 4; my $bq_on = shift; # "B/Q" decode ON/OFF $bq_on = 1 unless defined $bq_on; if ($bq_on) { $str =~ /([^=]*)=\s*"(.*?[^\\])"\s*/; ($param, $value) = ($1, $2); my $reg_ew = qr{^ =\? ([-0-9A-Za-z_]+) # charset (?:\*([A-Za-z]{1,8} # language (?:-[A-Za-z]{1,8})*))? # (RFC 2231 section 5) \? (?: [Bb]\?([0-9A-Za-z\+\/]+={0,2})\?= # "B" encoding | [Qq]\?([\x21-\x3e\x40-\x7e]+)\?= # "Q" encoding )}x; if ($value and $value =~ qr/$reg_ew(\s|$)/) { # "B" or "Q" ($charset0, $lang0) = ($1, $2); $lang0 = '' unless defined $lang0; $param0 = $param; require MIME::Base64; MIME::Base64->import(); require MIME::QuotedPrint; MIME::QuotedPrint->import(); my ($b_enc, $q_enc); for my $w (split /\s+/, $value) { if ($w =~ qr/$reg_ew$/o) { ($charset, $lang, $b_enc, $q_enc) = ($1, $2, $3, $4); if (defined $q_enc) { $q_enc =~ tr/_/ /; $value0 .= decode_qp($q_enc); } else { $value0 .= decode_base64($b_enc); } } } if (lc($charset0) eq 'iso-2022-jp') { # remove redundant ESC sequences $value0 =~ s/(\e..)([^\e]+)\e\(B(?=\1)/$1$2\n/g; $value0 =~ s/\n\e..//g; $value0 =~ s/\e\(B(\e..)/$1/g; } $result = "$sps$param0=\"$value0\""; if (wantarray) { return ($trailing_crlf ? $result . $trailing_crlf : $result, $param0, $charset0, $lang0, $value0); } return $trailing_crlf ? $result . $trailing_crlf : $result; } } my ($param0_init, $cs_init, $quote) = (0) x 3; my %params; while ($str =~ /([^=]*)=(\s*".*?[^\\]";?|\S*)\s*/g) { ($param, $value) = ($1, $2); $value =~ s/;$//; if ($value =~ s/^\s*"(.*)"$/$1/) { $quote = 1; } if ($param =~ s/\*$//) { if (!$cs_init) { if ($value =~ /^(.*?)'(.*?)'(.*)/) { ($charset0, $lang0, $value) = ($1, $2, $3); } $cs_init = 1; } $value =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg; } if (!$param0_init) { $param =~ s/\*0$//; $param0 = $param; $param0_init = 1; } $params{$param} = $value; } my $n = keys %params; $result = ($n == 0) ? "$sps$str" : "$sps$param0="; $value0 = $params{$param0}; $value0 = '' unless defined $value0; if ($n > 1) { for (my $i = 1; $i < $n; $i++) { $value = $params{$param0 . "\*$i"}; $value0 .= $value if defined $value; } } if (lc($charset0) eq 'iso-2022-jp') { # remove redundant ESC sequences $value0 =~ s/(\e..)([^\e]+)\e\(B(?=\1)/$1$2\n/g; $value0 =~ s/\n\e..//g; $value0 =~ s/\e\(B(\e..)/$1/g; } $result .= ($quote ? "\"$value0\"" : $value0); if (wantarray) { if (!$cs_init and $quote) { $value0 =~ s/\\(.)/$1/g; } return ($trailing_crlf ? $result . $trailing_crlf : $result, $param0, $charset0, $lang0, $value0); } return $trailing_crlf ? $result . $trailing_crlf : $result; } 1; MIME-EcoEncode-0.95/lib/MIME/EcoEncode/JP_Q.pm0000644000076400007640000001124412236100605020001 0ustar murata0murata0# Copyright (C) 2011-2013 MURATA Yasuhisa # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package MIME::EcoEncode::JP_Q; use strict; use warnings; our $VERSION = '0.95'; use MIME::QuotedPrint; use constant TAIL => '?='; our $HEAD; # head string our $HTL; # head + tail length our $LF; # line feed our $BPL; # bytes per line our $MODE; # unstructured : 0, structured : 1 # add encoded-word for "Q" encoding and 7bit-jis string # parameters: # sp : start position (indentation of the first line) # ep : end position of last line (call by reference) # rll : room of last line (default: 0) # fof : flag to check size-over at the first time sub add_ew_jp_q { my ($str, $sp, $ep, $rll, $fof) = @_; return '' if $str eq ''; # '.' is added to invalidate RFC 2045 6.7.(3) my $qstr = encode_qp($str . '.', ''); my $ee_len; # structured: 7, unstructured: 5 my $ee_str; chop($qstr); # cut '.' $qstr =~ s/_/=5F/g; $qstr =~ tr/ /_/; $qstr =~ s/\t/=09/g; if ($MODE) { # structured $ee_len = 7; # '=1B=28B' $ee_str = '=1B=28B'; $qstr =~ s/([^\w\!\*\+\-\/\=])/sprintf("=%X",ord($1))/ego; } else { # unstructured $ee_len = 5; # '=1B(B' $ee_str = '=1B(B'; $qstr =~ s/\?/=3F/g; } my $qstr_len = length($qstr); my $ep_v = $qstr_len + $HTL + $sp; if ($ep_v + $rll <= $BPL) { $$ep = $ep_v; return $HEAD . $qstr . TAIL; } my $ll_flag = ($ep_v <= $BPL) ? 1 : 0; my $k_in = 0; # ascii: 0, zen: 1 or 2, han: 9 my $k_in_bak = -1; my $k_out; my $ec; my $w1; my $w1_bak = ''; my $w = ''; my $w_len; my $w_bak = ''; my $chunk; my $chunk_len; my $result = ''; my $ep_str; my $max_len = $BPL - $HTL - $sp; my $max_len2 = $BPL - $HTL - 1; my $max_len3 = $BPL - $HTL - 1 - $rll; while ($str =~ /\e(..)|./g) { $ec = $1; $qstr =~ /(\=1B(?:\=..|.)(?:\=..|.)|(?:\=..|.))/g; $w1 = $1; $w .= $w1; if (defined $ec) { $w1_bak = $w1; if ($ec eq '(B') { $k_in = 0; } elsif ($ec eq '$B') { $k_in = 1; } else { $k_in = 9; } next; } else { if ($k_in == 1) { $k_in = 2; next; } elsif ($k_in == 2) { $k_in = 1; } } $k_out = $k_in ? $ee_len : 0; if (pos($qstr) + $k_out > $max_len) { $w_len = length($w); if ($k_in_bak < 0) { # size over at the first time $result = ' '; return $result if $fof; } else { if ($k_in_bak) { $chunk = $w_bak . substr($qstr, 0, pos($qstr) - $w_len, "") . $ee_str; if ($k_in) { if ($k_in_bak == $k_in) { $w = $w1_bak . $w; } } else { $w = $w1; } } else { $chunk = $w_bak . substr($qstr, 0, pos($qstr) - $w_len, ""); } $result .= $HEAD . $chunk . TAIL . "$LF "; } substr($qstr, 0, $w_len, ""); $chunk_len = length($qstr) + length($w); if ($chunk_len <= $max_len3) { $chunk = $w . $qstr; last; } $ll_flag = 1 if $chunk_len <= $max_len2; $w_bak = $w; $max_len = $max_len2 - length($w_bak); } else { if ($ll_flag and pos($qstr) + $k_out == length($qstr)) { # last char if ($k_in_bak < 0) { # size over at the first time $result = ' '; return $result if $fof; } else { if ($k_in_bak) { $chunk = $w_bak . substr($qstr, 0, pos($qstr) - length($w), "") . $ee_str; if ($k_in) { if ($k_in_bak == $k_in) { $w = $w1_bak . $w; } } else { $w = $w1; } } else { $chunk = $w_bak . substr($qstr, 0, pos($qstr) - length($w), ""); } $result .= $HEAD . $chunk . TAIL . "$LF "; } $chunk = $k_out ? $w . $ee_str : $w; last; } } $k_in_bak = $k_in; $w = ''; } $ep_str = $HEAD . $chunk . TAIL; $$ep = length($ep_str) + 1; return $result . $ep_str; } 1; MIME-EcoEncode-0.95/lib/MIME/EcoEncode/JP_B.pm0000644000076400007640000001102512235044615017766 0ustar murata0murata0# Copyright (C) 2011-2013 MURATA Yasuhisa # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package MIME::EcoEncode::JP_B; use strict; use warnings; our $VERSION = '0.95'; use MIME::Base64; use constant TAIL => '?='; our $HEAD; # head string our $HTL; # head + tail length our $LF; # line feed our $BPL; # bytes per line # add encoded-word for "B" encoding and 7bit-jis string # parameters: # sp : start position (indentation of the first line) # ep : end position of last line (call by reference) # rll : room of last line (default: 0) # fof : flag to check size-over at the first time sub add_ew_jp_b { my ($str, $sp, $ep, $rll, $fof) = @_; return '' if $str eq ''; # encoded size + sp my $ep_v = int((length($str) + 2) / 3) * 4 + $HTL + $sp; if ($ep_v + $rll <= $BPL) { $$ep = $ep_v; return $HEAD . encode_base64($str, '') . TAIL; } my $ll_flag = ($ep_v <= $BPL) ? 1 : 0; my $k_in = 0; # ascii: 0, zen: 1 or 2, han: 9 my $k_in_bak = -1; my $k_out; my $ec; my $w1; my $w1_bak = ''; my $w = ''; my $w_len; my $w_bak = ''; my $chunk; my $chunk_len; my $result = ''; my $ep_str; my $max_len = int(($BPL - $HTL - $sp) / 4) * 3; my $max_len2 = int(($BPL - $HTL - 1) / 4) * 3; my $max_len3 = int(($BPL - $HTL - 1 - $rll) / 4) * 3; while ($str =~ /(\e(..)|.)/g) { ($w1, $ec) = ($1, $2); $w .= $w1; if (defined $ec) { $w1_bak = $w1; if ($ec eq '(B') { $k_in = 0; } elsif ($ec eq '$B') { $k_in = 1; } else { $k_in = 9; } next; } else { if ($k_in == 1) { $k_in = 2; next; } elsif ($k_in == 2) { $k_in = 1; } } $k_out = $k_in ? 3 : 0; # 3 is "\e\(B" if (pos($str) + $k_out > $max_len) { $w_len = length($w); if ($k_in_bak < 0) { # size over at the first time $result = ' '; return $result if $fof; } else { if ($k_in_bak) { $chunk = $w_bak . substr($str, 0, pos($str) - $w_len, "") . "\e\(B"; if ($k_in) { if ($k_in_bak == $k_in) { $w = $w1_bak . $w; } } else { $w = $w1; } } else { $chunk = $w_bak . substr($str, 0, pos($str) - $w_len, ""); } $result .= $HEAD . encode_base64($chunk, '') . TAIL . "$LF "; } substr($str, 0, $w_len, ""); $chunk_len = length($str) + length($w); if ($chunk_len <= $max_len3) { $chunk = $w . $str; last; } $ll_flag = 1 if $chunk_len <= $max_len2; $w_bak = $w; $max_len = $max_len2 - length($w_bak); } else { if ($ll_flag and pos($str) + $k_out == length($str)) { # last char if ($k_in_bak < 0) { # size over at the first time $result = ' '; return $result if $fof; } else { if ($k_in_bak) { $chunk = $w_bak . substr($str, 0, pos($str) - length($w), "") . "\e\(B"; if ($k_in) { if ($k_in_bak == $k_in) { $w = $w1_bak . $w; } } else { $w = $w1; } } else { $chunk = $w_bak . substr($str, 0, pos($str) - length($w), ""); } $result .= $HEAD . encode_base64($chunk, '') . TAIL . "$LF "; } $chunk = $k_out ? $w . "\e\(B" : $w; last; } } $k_in_bak = $k_in; $w = ''; } $ep_str = $HEAD . encode_base64($chunk, '') . TAIL; $$ep = length($ep_str) + 1; return $result . $ep_str; } 1; MIME-EcoEncode-0.95/lib/MIME/EcoEncode/Fold.pod0000644000076400007640000000430412240617037020250 0ustar murata0murata0=head1 NAME MIME::EcoEncode::Fold - folding multi-byte string =head1 SYNOPSIS use MIME::EcoEncode::Fold; $folded = mime_eco_fold($str, 'UTF-8'); # fold utf8 string $folded = mime_eco_fold($str, 'GB2312'); # fold euc-cn string $folded = mime_eco_fold($str, 'EUC-KR'); # fold euc-kr string $folded = mime_eco_fold($str, 'Big5'); # fold big5 string $folded = mime_eco_fold($str, 'Shift_JIS'); # fold cp932 string $folded = mime_eco_fold($str, 'ISO-2022-JP'); # fold 7bit-jis string $folded = mime_eco_fold($str, $sbcs); # $sbcs : # single-byte charset # (e.g. 'ISO-8859-1') =head1 DESCRIPTION This is a module for folding multi-byte string. When the line of the e-mail text is long, SMTP server may insert line feed code and the multi-byte string might break. This module was written in order to prevent it. =head2 Options $folded = mime_eco_fold($str, $charset, $lf, $bpl); # $charset : 'UTF-8' / 'GB2312' / 'EUC-KR' / 'Big5' / # 'Shift_JIS' / 'ISO-2022-JP' / ... # (default: 'UTF-8') # Note: The others are all folded as # single-byte string. # $lf : line feed (default: "\n ") # $bpl : bytes per line (default: 990) # Note: 990 is postfix's default. =head2 Examples Ex1 use MIME::EcoEncode::Fold; my $str =<<"END"; This document specifies an Internet standards track protocol for the Internet community, and requests discussion and suggestions for improvements. END print mime_eco_fold($str, 'UTF-8', undef, 50); Ex1's output: This document specifies an Internet standards trac k protocol for the Internet community, and requests discussion and su ggestions for improvements. =head1 SEE ALSO L =head1 AUTHOR MURATA Yasuhisa Emurata@nips.ac.jpE =head1 COPYRIGHT Copyright (C) 2013 MURATA Yasuhisa =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut MIME-EcoEncode-0.95/lib/MIME/EcoEncode.pod0000644000076400007640000001034112240542376017365 0ustar murata0murata0=head1 NAME MIME::EcoEncode - MIME Encoding (Economical) =head1 SYNOPSIS use MIME::EcoEncode; $encoded = mime_eco($str, 'UTF-8'); # encode utf8 string $encoded = mime_eco($str, 'UTF-8?B'); # ditto ("B" encoding) $encoded = mime_eco($str, 'UTF-8?Q'); # ditto ("Q" encoding) $encoded = mime_eco($str, 'UTF-8*XX'); # XX is RFC2231's language $encoded = mime_eco($str, 'UTF-8*XX?B'); # ditto ("B" encoding) $encoded = mime_eco($str, 'UTF-8*XX?Q'); # ditto ("Q" encoding) $encoded = mime_eco($str, 'GB2312'); # encode euc-cn string $encoded = mime_eco($str, 'EUC-KR'); # encode euc-kr string $encoded = mime_eco($str, 'Big5'); # encode big5 string $encoded = mime_eco($str, 'Shift_JIS'); # encode cp932 string $encoded = mime_eco($str, 'ISO-2022-JP'); # encode 7bit-jis string $encoded = mime_eco($str, $sbcs); # $sbcs : # single-byte charset # (e.g. 'ISO-8859-1') $decoded = mime_deco($encoded); # decode encoded string # (for single charset) ($decoded, $charset, $language) # return array = mime_deco($encoded); # (for single charset) use Encode; $decoded = mime_deco($encoded, \&cb); # cb is callback subroutine # (for multiple charsets) # Example of callback subroutine sub cb { my ($encoded_word, $charset, $language, $decoded_word) = @_; encode_utf8(decode($charset, $decoded_word)); } =head1 DESCRIPTION This module implements RFC 2047 Mime Header Encoding. =head2 Options $encoded = mime_eco($str, $charset, $lf, $bpl, $mode, $lss); # $charset : 'UTF-8' / 'UTF-8?Q' / 'UTF-8*XX' / # 'GB2312' / 'EUC-KR' / 'Big5' / # 'Shift_JIS' / 'ISO-2022-JP' / ... # (default: 'UTF-8') # Note: "B" encoding is all defaults. # The others are all encoded as # single-byte string. # $lf : line feed (default: "\n") # $bpl : bytes per line (default: 76) # $mode : 0 : unstructured header (e.g. Subject) # 1 : structured header (e.g. To, Cc, From) # 2 : auto (Subject or Comments ? 0 : 1) # (default: 2) # $lss : length of security space (default: 25) =head2 Examples Ex1 - normal (structured header) use MIME::EcoEncode; my $str = "From: Sakura (\xe6\xa1\x9c)\n"; print mime_eco($str); Ex1's output: From: Sakura (=?UTF-8?B?5qGc?=) Ex2 - "Q" encoding + RFC2231's language use MIME::EcoEncode; my $str = "From: Sakura (\xe6\xa1\x9c)\n"; print mime_eco($str, 'UTF-8*ja-JP?Q'); Ex2's output: From: Sakura (=?UTF-8*ja-JP?Q?=E6=A1=9C?=) Ex3 - continuous spaces use MIME::EcoEncode; my $str = "From: Sakura (\xe6\xa1\x9c)\n"; print mime_eco($str); Ex3's output: From: Sakura (=?UTF-8?B?5qGc?=) Ex4 - unstructured header (1) use MIME::EcoEncode; my $str = "Subject: Sakura (\xe6\xa1\x9c)\n"; print mime_eco($str); Ex4's output: Subject: Sakura =?UTF-8?B?KOahnCk=?= Ex5 - unstructured header (2) use MIME::EcoEncode; my $str = "Subject: \xe6\xa1\x9c Sakura\n"; print mime_eco($str); Ex5's output: Subject: =?UTF-8?B?5qGc?= Sakura Ex6 - 7bit-jis string use Encode; use MIME::EcoEncode; my $str = "Subject: \xe6\xa1\x9c Sakura\n"; print mime_eco(encode('7bit-jis', decode_utf8($str)), 'ISO-2022-JP'); Ex6's output: Subject: =?ISO-2022-JP?B?GyRCOnkbKEI=?= Sakura =head1 SEE ALSO L, L For more information, please visit F =head1 AUTHOR MURATA Yasuhisa Emurata@nips.ac.jpE =head1 COPYRIGHT Copyright (C) 2011-2013 MURATA Yasuhisa =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut MIME-EcoEncode-0.95/lib/MIME/EcoEncode.pm0000644000076400007640000004201212240532154017210 0ustar murata0murata0# Copyright (C) 2011-2013 MURATA Yasuhisa # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package MIME::EcoEncode; use 5.008005; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw($VERSION); our @EXPORT = qw(mime_eco mime_deco); our $VERSION = '0.95'; use MIME::Base64; use MIME::QuotedPrint; use constant TAIL => '?='; our $LF; # line feed our $BPL; # bytes per line our $MODE; # unstructured : 0, structured : 1, auto : 2 our $HEAD; # head string our $HTL; # head + tail length our $UTF8; our $REG_W; our $ADD_EW; our $REG_RP; sub mime_eco { my $str = shift; return '' unless defined $str; return '' if $str eq ''; my ($trailing_crlf) = ($str =~ /(\x0d?\x0a|\x0d)$/); $str =~ tr/\n\r//d; if ($str =~ /^\s*$/) { return $trailing_crlf ? $str . $trailing_crlf : $str; } my $charset = shift || 'UTF-8'; # invalid option return undef unless $charset =~ /^([-0-9A-Za-z_]+)(?:\*[^\?]*)?(\?[QB])?$/i; my $cs = lc($1); $charset .= '?B' unless defined $2; our $LF = shift || "\n"; # line feed our $BPL = shift || 76; # bytes per line our $MODE = shift; $MODE = 2 unless defined $MODE; my $lss = shift; $lss = 25 unless defined $lss; our $HEAD; # head string our $HTL; # head + tail length our $UTF8 = 1; our $REG_W = qr/(.)/; our $ADD_EW; our $REG_RP; my $jp = 0; my $pos; my $np; my $refsub; my $reg_rp1; my ($w1, $w1_len, $w2); my ($sps, $sps_len); my $sp1 = ''; my $sp1_bak; my $result; my $ascii; my $tmp; my $count = 0; my $q_enc = ($charset =~ /Q$/i) ? 1 : 0; $HEAD = '=?' . $charset . '?'; $HTL = length($HEAD) + 2; if ($cs ne 'utf-8') { $UTF8 = 0; if ($cs eq 'iso-2022-jp') { $jp = 1; } elsif ($cs eq 'shift_jis') { # range of 2nd byte : [\x40-\x7e\x80-\xfc] $REG_W = qr/([\x81-\x9f\xe0-\xfc]?.)/; } elsif ($cs eq 'gb2312') { # Simplified Chinese # range of 2nd byte : [\xa1-\xfe] $REG_W = qr/([\xa1-\xfe]?.)/; } elsif ($cs eq 'euc-kr') { # Korean # range of 2nd byte : [\xa1-\xfe] $REG_W = qr/([\xa1-\xfe]?.)/; } elsif ($cs eq 'big5') { # Traditional Chinese # range of 2nd byte : [\x40-\x7e\xa1-\xfe] $REG_W = qr/([\x81-\xfe]?.)/; } else { # Single Byte (Latin, Cyrillic, ...) ; } } $str =~ /(\s*)(\S+)/gc; ($sps, $w2) = ($1, $2); if ($w2 =~ /[^\x21-\x7e]/) { $ascii = 0; $sps_len = length($sps); if ($sps_len > $lss) { $result = substr($sps, 0, $lss); $w1 = substr($sps, $lss) . $w2; $pos = $lss; } else { $result = $sps; $w1 = $w2; $pos = $sps_len; } } else { $ascii = 1; $result = ''; $w1 = "$sps$w2"; $pos = 0; } if ($MODE == 2) { $MODE = ($w1 =~ /^(?:Subject:|Comments:)$/i) ? 0 : 1; } if ($jp) { if ($q_enc) { require MIME::EcoEncode::JP_Q; $MIME::EcoEncode::JP_Q::HEAD = $HEAD; $MIME::EcoEncode::JP_Q::HTL = $HTL; $MIME::EcoEncode::JP_Q::LF = $LF; $MIME::EcoEncode::JP_Q::BPL = $BPL; $MIME::EcoEncode::JP_Q::MODE = $MODE; if ($MODE == 0) { $refsub = \&MIME::EcoEncode::JP_Q::add_ew_jp_q; } else { $refsub = \&add_ew_sh; $reg_rp1 = qr/\e\(B[\x21-\x7e]*\)\,?$/; $REG_RP = qr/\e\(B[\x21-\x7e]*?(\){1,3}\,?)$/; $ADD_EW = \&MIME::EcoEncode::JP_Q::add_ew_jp_q; } } else { require MIME::EcoEncode::JP_B; $MIME::EcoEncode::JP_B::HEAD = $HEAD; $MIME::EcoEncode::JP_B::HTL = $HTL; $MIME::EcoEncode::JP_B::LF = $LF; $MIME::EcoEncode::JP_B::BPL = $BPL; if ($MODE == 0) { $refsub = \&MIME::EcoEncode::JP_B::add_ew_jp_b; } else { $refsub = \&add_ew_sh; $reg_rp1 = qr/\e\(B[\x21-\x7e]*\)\,?$/; $REG_RP = qr/\e\(B[\x21-\x7e]*?(\){1,3}\,?)$/; $ADD_EW = \&MIME::EcoEncode::JP_B::add_ew_jp_b; } } } else { if ($MODE == 0) { $refsub = $q_enc ? \&add_ew_q : \&add_ew_b; } else { $refsub = \&add_ew_sh; $reg_rp1 = qr/\)\,?$/; $REG_RP = qr/(\){1,3}\,?)$/; $ADD_EW = $q_enc ? \&add_ew_q : \&add_ew_b; } } while ($str =~ /(\s*)(\S+)/gc) { ($sps, $w2) = ($1, $2); if ($w2 =~ /[^\x21-\x7e]/) { $sps_len = length($sps); if ($ascii) { # "ASCII \s+ non-ASCII" $sp1_bak = $sp1; $sp1 = chop($sps); $w1 .= $sps if $sps_len > $lss; $w1_len = length($w1); if ($count == 0) { $result = $w1; $pos = $w1_len; } else { if (($count > 1) and ($pos + $w1_len + 1 > $BPL)) { $result .= "$LF$sp1_bak$w1"; $pos = $w1_len + 1; } else { $result .= "$sp1_bak$w1"; $pos += $w1_len + 1; } } if ($sps_len <= $lss) { if ($pos >= $BPL) { $result .= $LF . $sps; $pos = $sps_len - 1; } elsif ($pos + $sps_len - 1 > $BPL) { $result .= substr($sps, 0, $BPL - $pos) . $LF . substr($sps, $BPL - $pos); $pos += $sps_len - $BPL - 1; } else { $result .= $sps; $pos += $sps_len - 1; } } $w1 = $w2; } else { # "non-ASCII \s+ non-ASCII" if (($MODE == 1) and ($sps_len <= $lss)) { if ($w1 =~ /$reg_rp1/ or $w2 =~ /^\(/) { if ($count == 0) { $result .= &$refsub($w1, $pos, \$np, 0); } else { $tmp = &$refsub($w1, 1 + $pos, \$np, 0); $result .= ($tmp =~ s/^ /$sp1/) ? "$LF$tmp" : "$sp1$tmp"; } $pos = $np; $sp1 = chop($sps); if ($pos + $sps_len - 1 > $BPL) { $result .= substr($sps, 0, $BPL - $pos) . $LF . substr($sps, $BPL - $pos); $pos += $sps_len - $BPL - 1; } else { $result .= $sps; $pos += $sps_len - 1; } $w1 = $w2; } else { $w1 .= "$sps$w2"; } } else { $w1 .= "$sps$w2"; } } $ascii = 0; } else { # "ASCII \s+ ASCII" or "non-ASCII \s+ ASCII" $w1_len = length($w1); if ($ascii) { # "ASCII \s+ ASCII" if ($count == 0) { $result = $w1; $pos = $w1_len; } else { if (($count > 1) and ($pos + $w1_len + 1 > $BPL)) { $result .= "$LF$sp1$w1"; $pos = $w1_len + 1; } else { $result .= "$sp1$w1"; $pos += $w1_len + 1; } } } else { # "non-ASCII \s+ ASCII" if ($count == 0) { $result .= &$refsub($w1, $pos, \$np, 0); $pos = $np; } else { $tmp = &$refsub($w1, 1 + $pos, \$np, 0); $result .= ($tmp =~ s/^ /$sp1/) ? "$LF$tmp" : "$sp1$tmp"; $pos = $np; } } $sps_len = length($sps); if ($pos >= $BPL) { $sp1 = substr($sps, 0, 1); $w2 = substr($sps, 1) . $w2; } elsif ($pos + $sps_len - 1 > $BPL) { $result .= substr($sps, 0, $BPL - $pos); $sp1 = substr($sps, $BPL - $pos, 1); $w2 = substr($sps, $BPL - $pos + 1) . $w2; $pos = $BPL; } else { $sp1 = chop($sps); $result .= $sps; $pos += $sps_len - 1; } $w1 = $w2; $ascii = 1; } $count++ if $count <= 1; } ($sps) = ($str =~ /(.*)/g); # All space of the remainder if ($ascii) { $w1 .= $sps; if ($count == 0) { $result = $w1; } else { $w1_len = length($w1); if (($count > 1) and ($pos + $w1_len + 1 > $BPL)) { $result .= "$LF$sp1$w1"; } else { $result .= "$sp1$w1"; } } } else { $sps_len = length($sps); if ($count == 0) { if ($sps_len > $lss) { $w1 .= substr($sps, 0, $sps_len - $lss); $result .= &$refsub($w1, $pos, \$np, $lss) . substr($sps, $sps_len - $lss); } else { $result .= &$refsub($w1, $pos, \$np, $sps_len) . $sps; } } else { if ($sps_len > $lss) { $w1 .= substr($sps, 0, $sps_len - $lss); $tmp = &$refsub($w1, 1 + $pos, \$np, $lss) . substr($sps, $sps_len - $lss); } else { $tmp = &$refsub($w1, 1 + $pos, \$np, $sps_len) . $sps; } $result .= ($tmp =~ s/^ /$sp1/) ? "$LF$tmp" : "$sp1$tmp"; } } return $trailing_crlf ? $result . $trailing_crlf : $result; } # add encoded-word (for structured header) # parameters: # sp : start position (indentation of the first line) # ep : end position of last line (call by reference) # rll : room of last line (default: 0) sub add_ew_sh { my ($str, $sp, $ep, $rll) = @_; our $ADD_EW; our $REG_RP; my ($lp, $rp); # '(' & ')' : left/right parenthesis my ($lp_len, $rp_len) = (0, 0); my $tmp; if ($str =~ s/^(\({1,3})//) { $lp = $1; $lp_len = length($lp); $sp += $lp_len; } if ($str =~ /$REG_RP/) { $rp = $1; $rp_len = length($rp); $rll = $rp_len; substr($str, -$rp_len) = ''; } $tmp = &$ADD_EW($str, $sp, $ep, $rll); if ($lp_len > 0) { if ($tmp !~ s/^ / $lp/) { $tmp = $lp . $tmp; } } if ($rp_len > 0) { $tmp .= $rp; $$ep += $rp_len; } return $tmp; } # add encoded-word for "B" encoding sub add_ew_b { my ($str, $sp, $ep, $rll, $fof) = @_; return '' if $str eq ''; our $LF; # line feed our $BPL; # bytes per line our $HEAD; # head string our $HTL; # head + tail length our $UTF8; our $REG_W; my $str_len = length($str); # encoded size + sp my $ep_v = int(($str_len + 2) / 3) * 4 + $HTL + $sp; if ($ep_v + $rll <= $BPL) { $$ep = $ep_v; return $HEAD . encode_base64($str, '') . TAIL; } my $result = ''; my $w; utf8::decode($str) if $UTF8; # UTF8 flag on if ($ep_v <= $BPL) { $str =~ s/$REG_W$//; $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $$ep = int((length($w) + 2) / 3) * 4 + $HTL + 1; # 1 is space utf8::encode($str) if $UTF8; # UTF8 flag off $result = ($str eq '') ? ' ' : $HEAD . encode_base64($str, '') . TAIL . "$LF "; return $result . $HEAD . encode_base64($w, '') . TAIL; } my ($chunk, $chunk_len) = ('', 0); my $w_len; my $str_pos = 0; my $max_len = int(($BPL - $HTL - $sp) / 4) * 3; my $max_len2 = int(($BPL - $HTL - 1) / 4) * 3; while ($str =~ /$REG_W/g) { $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $w_len = length($w); # size of one character if ($chunk_len + $w_len > $max_len) { if ($chunk_len == 0) { # size over at the first time $result = ' '; return $result if $fof; } else { $result .= $HEAD . encode_base64($chunk, '') . TAIL . "$LF "; } $str_pos += $chunk_len; # encoded size (1 is space) $ep_v = int(($str_len - $str_pos + 2) / 3) * 4 + $HTL + 1; if ($ep_v + $rll <= $BPL) { utf8::encode($str) if $UTF8; # UTF8 flag off $chunk = substr($str, $str_pos); last; } if ($ep_v <= $BPL) { $str =~ s/$REG_W$//; $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $w_len = length($w); utf8::encode($str) if $UTF8; # UTF8 flag off $chunk = substr($str, $str_pos); $result .= $HEAD . encode_base64($chunk, '') . TAIL . "$LF "; $ep_v = int(($w_len + 2) / 3) * 4 + $HTL + 1; # 1 is space $chunk = $w; last; } $chunk = $w; $chunk_len = $w_len; $max_len = $max_len2; } else { $chunk .= $w; $chunk_len += $w_len; } } $$ep = $ep_v; return $result . $HEAD . encode_base64($chunk, '') . TAIL; } # add encoded-word for "Q" encoding sub add_ew_q { my ($str, $sp, $ep, $rll, $fof) = @_; return '' if $str eq ''; our $LF; # line feed our $BPL; # bytes per line our $MODE; # unstructured : 0, structured : 1 our $HEAD; # head string our $HTL; # head + tail length our $UTF8; our $REG_W; # '.' is added to invalidate RFC 2045 6.7.(3) my $qstr = encode_qp($str . '.', ''); local *qlen; chop($qstr); # cut '.' $qstr =~ s/_/=5F/g; $qstr =~ tr/ /_/; $qstr =~ s/\t/=09/g; if ($MODE) { # structured $qstr =~ s/([^\w\!\*\+\-\/\=])/sprintf("=%X",ord($1))/ego; *qlen = sub { my $str = shift; return length($str) * 3 - ($str =~ tr/ A-Za-z0-9\!\*\+\-\///) * 2; }; } else { # unstructured $qstr =~ s/\?/=3F/g; *qlen = sub { my $str = shift; return length($str) * 3 - ($str =~ tr/ -\<\>\@-\^\`-\~//) * 2; }; } my $ep_v = length($qstr) + $HTL + $sp; if ($ep_v + $rll <= $BPL) { $$ep = $ep_v; return $HEAD . $qstr . TAIL; } utf8::decode($str) if $UTF8; # UTF8 flag on my $result = ''; my $chunk_qlen = 0; my $w_qlen; my $enc_len; my $w; if ($ep_v <= $BPL) { $str =~ s/$REG_W$//; $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $w_qlen = qlen($w); $$ep = $w_qlen + $HTL + 1; # 1 is space $result = ($str eq '') ? ' ' : $HEAD . substr($qstr, 0, -$w_qlen, '') . TAIL . "$LF "; return $result . $HEAD . $qstr . TAIL; } my $max_len = $BPL - $HTL - $sp; my $max_len2 = $BPL - $HTL - 1; while ($str =~ /$REG_W/g) { $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $w_qlen = qlen($w); if ($chunk_qlen + $w_qlen > $max_len) { if ($chunk_qlen == 0) { # size over at the first time $result = ' '; return $result if $fof; } else { $result .= $HEAD . substr($qstr, 0, $chunk_qlen, '') . TAIL . "$LF "; } $ep_v = length($qstr) + $HTL + 1; # 1 is space if ($ep_v + $rll <= $BPL) { last; } if ($ep_v <= $BPL) { $str =~ s/$REG_W$//; $w = $1; utf8::encode($w) if $UTF8; # UTF8 flag off $w_qlen = qlen($w); $result .= $HEAD . substr($qstr, 0, -$w_qlen, '') . TAIL . "$LF "; $ep_v = $w_qlen + $HTL + 1; # 1 is space last; } $chunk_qlen = $w_qlen; $max_len = $max_len2; } else { $chunk_qlen += $w_qlen; } } $$ep = $ep_v; return $result . $HEAD . $qstr . TAIL; } sub mime_deco { my $str = shift; my $cb = shift; my ($charset, $lang, $b_enc, $q_enc); my $result = ''; my $enc = 0; my $w_bak = ''; my $sp_len = 0; my ($lp, $rp); # '(' & ')' : left/right parenthesis my $reg_ew = qr{^ =\? ([-0-9A-Za-z_]+) # charset (?:\*([A-Za-z]{1,8} # language (?:-[A-Za-z]{1,8})*))? # (RFC 2231 section 5) \? (?: [Bb]\?([0-9A-Za-z\+\/]+={0,2})\?= # "B" encoding | [Qq]\?([\x21-\x3e\x40-\x7e]+)\?= # "Q" encoding ) $}x; my ($trailing_crlf) = ($str =~ /(\x0d?\x0a|\x0d)$/); $str =~ tr/\n\r//d; if ($cb) { for my $w (split /([\s]+)/, $str) { $w =~ s/^(\(*)//; $lp = $1; $w =~ s/(\)*)$//; $rp = $1; if ($w =~ qr/$reg_ew/o) { ($charset, $lang, $b_enc, $q_enc) = ($1, $2, $3, $4); $lang = '' unless defined $lang; substr($result, -$sp_len) = "" if ($enc and !$lp); if (defined $q_enc) { $q_enc =~ tr/_/ /; $result .= $lp . &$cb($w, $charset, $lang, decode_qp($q_enc)) . $rp; } else { $result .= $lp . &$cb($w, $charset, $lang, decode_base64($b_enc)) . $rp; } $enc = 1; } else { if ($enc) { if ($w =~ /^\s+$/) { $sp_len = length($w); } else { $enc = 0; } } $result .= $lp . $w . $rp; } } } else { my $cs1 = ''; my $res_cs1 = ''; my $res_lang1 = ''; for my $w (split /([\s]+)/, $str) { $w =~ s/^(\(*)//; $lp = $1; $w =~ s/(\)*)$//; $rp = $1; if ($w =~ qr/$reg_ew/o) { ($charset, $lang, $b_enc, $q_enc) = ($1, $2, $3, $4); if ($charset !~ /^US-ASCII$/i) { if ($cs1) { if ($cs1 ne lc($charset)) { $result .= $w; $enc = 0; next; } } else { $cs1 = lc($charset); $res_cs1 = $charset || ''; $res_lang1 = $lang || ''; } } substr($result, -$sp_len) = "" if ($enc and !$lp); if (defined $q_enc) { $q_enc =~ tr/_/ /; $result .= $lp . decode_qp($q_enc) . $rp; } else { $result .= $lp . decode_base64($b_enc) . $rp; } $enc = $rp ? 0 : 1; } else { if ($enc) { if ($w =~ /^\s+$/) { $sp_len = length($w); } else { $enc = 0; } } $result .= $lp . $w . $rp; } } if ($cs1 eq 'iso-2022-jp') { # remove redundant ESC sequences $result =~ s/(\e..)([^\e]+)\e\(B(?=\1)/$1$2\n/g; $result =~ s/\n\e..//g; $result =~ s/\e\(B(\e..)/$1/g; } if (wantarray) { return ($trailing_crlf ? $result . $trailing_crlf : $result, $res_cs1, $res_lang1); } } return $trailing_crlf ? $result . $trailing_crlf : $result; } 1; MIME-EcoEncode-0.95/Changes0000644000076400007640000000434512241050270015026 0ustar murata0murata0Revision history for Perl extension MIME::EcoEncode. 0.95 2013-11-14 - Improved performance - Added sub modules: MIME::EcoEncode::Param, MIME::EcoEncode::Fold - Added support "Q" encoding of ISO-2022-JP - Added support Shift_JIS charset - Corrected the handling of CRLF of the end 0.93 2013-09-27 - Improved performance : mime_eco() 0.92 2013-09-09 - Fixed bug : mime_eco() (in the case of very long whitespace is contained) 0.91 2013-08-29 - Fixed bug : "B" encoding of ISO-2022-JP (in the case of Hankaku-Katakana is contained) - Improved performance 0.90 2013-08-28 - Fixed bug : "B" encoding of ISO-2022-JP - Fixed bug : "Q" encoding (in the case of TAB is contained, etc.) - Fixed bug : "Q" decoding (in the case of parenthesis is contained) - Added code to remove redundant ESC sequences in ISO-2022-JP - Added code to return array in mime_deco() 0.82 2012-10-17 - Fixed bug : in the case only of ASCII character string Thank you, SASAO-Sama 0.81 2012-02-14 - Fixed bug : "B" encoding of UTF-8 0.80 2012-02-02 - Added subroutine mime_deco() - Added support RFC2231's language - Added support "Q" encoding about all charset except ISO-2022-JP - Added support all single-byte charsets 0.70 2012-01-19 - Added support the following charset: ISO-8859-xx, GB2312, EUC-KR, Big5 - Added support "Q" encoding (only UTF-8 and ISO-8859-xx) - Added support the charset name of lowercase letters 0.61 2012-01-13 - Improved performance - Corrected the mistakes 0.60 2012-01-05 - Added option : $lss - Added support 'comment' in 'comment' (max depth is 3) - Fixed bug 0.50 2011-12-21 - Improved encoding for continuous space/tab character - Added support for recognizing 'comment's in structured field bodies - Added option : $mode - Deleted global variable : $MIME::EcoEncode::JCODE_COMPAT 0.34 2011-11-30 - improve performance : add_enc_word_utf8() 0.33 2011-11-29 - bug fix & improve performance : add_enc_word_utf8() 0.32 2011-11-16 - support options : $lf and $bpl 0.31 2011-10-12 - update Makefile.PL (added LICENSE) - use "make dist" command for tarball 0.30 2011-10-11 - rename : MimeEco -> MIME::EcoEncode 0.20 2011-03-28 - improve performance 0.10 2011-03-18 - First release MIME-EcoEncode-0.95/MANIFEST0000644000076400007640000000072512241053107014664 0ustar murata0murata0Changes lib/MIME/EcoEncode.pm lib/MIME/EcoEncode.pod lib/MIME/EcoEncode/Fold.pm lib/MIME/EcoEncode/Fold.pod lib/MIME/EcoEncode/JP_B.pm lib/MIME/EcoEncode/JP_Q.pm lib/MIME/EcoEncode/Param.pm lib/MIME/EcoEncode/Param.pod Makefile.PL MANIFEST This list of files README t/EcoEncode.t t/Fold.t t/Param.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker)