| Filename | /usr/lib/perl5/5.18.2/x86_64-linux-thread-multi/Encode/Alias.pm |
| Statements | Executed 165 statements in 215µs |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 33 | 1 | 1 | 223µs | 223µs | Encode::Alias::find_alias |
| 0 | 0 | 0 | 0s | 0s | Encode::Alias::BEGIN |
| 0 | 0 | 0 | 0s | 0s | Encode::Alias::define_alias |
| 0 | 0 | 0 | 0s | 0s | Encode::Alias::init_aliases |
| 0 | 0 | 0 | 0s | 0s | Encode::Alias::undef_aliases |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package Encode::Alias; | ||||
| 2 | use strict; | ||||
| 3 | use warnings; | ||||
| 4 | no warnings 'redefine'; | ||||
| 5 | our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; | ||||
| 6 | use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; | ||||
| 7 | |||||
| 8 | use base qw(Exporter); | ||||
| 9 | |||||
| 10 | # Public, encouraged API is exported by default | ||||
| 11 | |||||
| 12 | our @EXPORT = | ||||
| 13 | qw ( | ||||
| 14 | define_alias | ||||
| 15 | find_alias | ||||
| 16 | ); | ||||
| 17 | |||||
| 18 | our @Alias; # ordered matching list | ||||
| 19 | our %Alias; # cached known aliases | ||||
| 20 | |||||
| 21 | # spent 223µs within Encode::Alias::find_alias which was called 33 times, avg 7µs/call:
# 33 times (223µs+0s) by Encode::getEncoding at line 113 of Encode.pm, avg 7µs/call | ||||
| 22 | 33 | 64µs | require Encode; | ||
| 23 | 33 | 12µs | my $class = shift; | ||
| 24 | 33 | 12µs | my $find = shift; | ||
| 25 | 33 | 40µs | unless ( exists $Alias{$find} ) { | ||
| 26 | $Alias{$find} = undef; # Recursion guard | ||||
| 27 | for ( my $i = 0 ; $i < @Alias ; $i += 2 ) { | ||||
| 28 | my $alias = $Alias[$i]; | ||||
| 29 | my $val = $Alias[ $i + 1 ]; | ||||
| 30 | my $new; | ||||
| 31 | if ( ref($alias) eq 'Regexp' && $find =~ $alias ) { | ||||
| 32 | DEBUG and warn "eval $val"; | ||||
| 33 | $new = eval $val; | ||||
| 34 | DEBUG and $@ and warn "$val, $@"; | ||||
| 35 | } | ||||
| 36 | elsif ( ref($alias) eq 'CODE' ) { | ||||
| 37 | DEBUG and warn "$alias", "->", "($find)"; | ||||
| 38 | $new = $alias->($find); | ||||
| 39 | } | ||||
| 40 | elsif ( lc($find) eq lc($alias) ) { | ||||
| 41 | $new = $val; | ||||
| 42 | } | ||||
| 43 | if ( defined($new) ) { | ||||
| 44 | next if $new eq $find; # avoid (direct) recursion on bugs | ||||
| 45 | DEBUG and warn "$alias, $new"; | ||||
| 46 | my $enc = | ||||
| 47 | ( ref($new) ) ? $new : Encode::find_encoding($new); | ||||
| 48 | if ($enc) { | ||||
| 49 | $Alias{$find} = $enc; | ||||
| 50 | last; | ||||
| 51 | } | ||||
| 52 | } | ||||
| 53 | } | ||||
| 54 | |||||
| 55 | # case insensitive search when canonical is not in all lowercase | ||||
| 56 | # RT ticket #7835 | ||||
| 57 | unless ( $Alias{$find} ) { | ||||
| 58 | my $lcfind = lc($find); | ||||
| 59 | for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule ) | ||||
| 60 | { | ||||
| 61 | $lcfind eq lc($name) or next; | ||||
| 62 | $Alias{$find} = Encode::find_encoding($name); | ||||
| 63 | DEBUG and warn "$find => $name"; | ||||
| 64 | } | ||||
| 65 | } | ||||
| 66 | } | ||||
| 67 | if (DEBUG) { | ||||
| 68 | my $name; | ||||
| 69 | if ( my $e = $Alias{$find} ) { | ||||
| 70 | $name = $e->name; | ||||
| 71 | } | ||||
| 72 | else { | ||||
| 73 | $name = ""; | ||||
| 74 | } | ||||
| 75 | warn "find_alias($class, $find)->name = $name"; | ||||
| 76 | } | ||||
| 77 | 33 | 86µs | return $Alias{$find}; | ||
| 78 | } | ||||
| 79 | |||||
| 80 | sub define_alias { | ||||
| 81 | while (@_) { | ||||
| 82 | my ( $alias, $name ) = splice( @_, 0, 2 ); | ||||
| 83 | unshift( @Alias, $alias => $name ); # newer one has precedence | ||||
| 84 | if ( ref($alias) ) { | ||||
| 85 | |||||
| 86 | # clear %Alias cache to allow overrides | ||||
| 87 | my @a = keys %Alias; | ||||
| 88 | for my $k (@a) { | ||||
| 89 | if ( ref($alias) eq 'Regexp' && $k =~ $alias ) { | ||||
| 90 | DEBUG and warn "delete \$Alias\{$k\}"; | ||||
| 91 | delete $Alias{$k}; | ||||
| 92 | } | ||||
| 93 | elsif ( ref($alias) eq 'CODE' && $alias->($k) ) { | ||||
| 94 | DEBUG and warn "delete \$Alias\{$k\}"; | ||||
| 95 | delete $Alias{$k}; | ||||
| 96 | } | ||||
| 97 | } | ||||
| 98 | } | ||||
| 99 | else { | ||||
| 100 | DEBUG and warn "delete \$Alias\{$alias\}"; | ||||
| 101 | delete $Alias{$alias}; | ||||
| 102 | } | ||||
| 103 | } | ||||
| 104 | } | ||||
| 105 | |||||
| 106 | # Allow latin-1 style names as well | ||||
| 107 | # 0 1 2 3 4 5 6 7 8 9 10 | ||||
| 108 | our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); | ||||
| 109 | |||||
| 110 | # Allow winlatin1 style names as well | ||||
| 111 | our %Winlatin2cp = ( | ||||
| 112 | 'latin1' => 1252, | ||||
| 113 | 'latin2' => 1250, | ||||
| 114 | 'cyrillic' => 1251, | ||||
| 115 | 'greek' => 1253, | ||||
| 116 | 'turkish' => 1254, | ||||
| 117 | 'hebrew' => 1255, | ||||
| 118 | 'arabic' => 1256, | ||||
| 119 | 'baltic' => 1257, | ||||
| 120 | 'vietnamese' => 1258, | ||||
| 121 | ); | ||||
| 122 | |||||
| 123 | init_aliases(); | ||||
| 124 | |||||
| 125 | sub undef_aliases { | ||||
| 126 | @Alias = (); | ||||
| 127 | %Alias = (); | ||||
| 128 | } | ||||
| 129 | |||||
| 130 | sub init_aliases { | ||||
| 131 | require Encode; | ||||
| 132 | undef_aliases(); | ||||
| 133 | |||||
| 134 | # Try all-lower-case version should all else fails | ||||
| 135 | define_alias( qr/^(.*)$/ => '"\L$1"' ); | ||||
| 136 | |||||
| 137 | # UTF/UCS stuff | ||||
| 138 | define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' ); | ||||
| 139 | define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' ); | ||||
| 140 | define_alias( | ||||
| 141 | qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"', | ||||
| 142 | qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")', | ||||
| 143 | qr/^iso-10646-1$/i => '"UCS-2BE"' | ||||
| 144 | ); | ||||
| 145 | define_alias( | ||||
| 146 | qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"', | ||||
| 147 | qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"', | ||||
| 148 | qr/^UTF-?(16|32)$/i => '"UTF-$1"', | ||||
| 149 | ); | ||||
| 150 | |||||
| 151 | # ASCII | ||||
| 152 | define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' ); | ||||
| 153 | define_alias( 'C' => 'ascii' ); | ||||
| 154 | define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' ); | ||||
| 155 | |||||
| 156 | # Allow variants of iso-8859-1 etc. | ||||
| 157 | define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); | ||||
| 158 | |||||
| 159 | # At least HP-UX has these. | ||||
| 160 | define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' ); | ||||
| 161 | |||||
| 162 | # More HP stuff. | ||||
| 163 | define_alias( | ||||
| 164 | qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => | ||||
| 165 | '"${1}8"' ); | ||||
| 166 | |||||
| 167 | # The Official name of ASCII. | ||||
| 168 | define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); | ||||
| 169 | |||||
| 170 | # This is a font issue, not an encoding issue. | ||||
| 171 | # (The currency symbol of the Latin 1 upper half | ||||
| 172 | # has been redefined as the euro symbol.) | ||||
| 173 | define_alias( qr/^(.+)\@euro$/i => '"$1"' ); | ||||
| 174 | |||||
| 175 | define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i => | ||||
| 176 | 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' | ||||
| 177 | ); | ||||
| 178 | |||||
| 179 | define_alias( | ||||
| 180 | qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish| | ||||
| 181 | hebrew|arabic|baltic|vietnamese)$/ix => | ||||
| 182 | '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' | ||||
| 183 | ); | ||||
| 184 | |||||
| 185 | # Common names for non-latin preferred MIME names | ||||
| 186 | define_alias( | ||||
| 187 | 'ascii' => 'US-ascii', | ||||
| 188 | 'cyrillic' => 'iso-8859-5', | ||||
| 189 | 'arabic' => 'iso-8859-6', | ||||
| 190 | 'greek' => 'iso-8859-7', | ||||
| 191 | 'hebrew' => 'iso-8859-8', | ||||
| 192 | 'thai' => 'iso-8859-11', | ||||
| 193 | ); | ||||
| 194 | # RT #20781 | ||||
| 195 | define_alias(qr/\btis-?620\b/i => '"iso-8859-11"'); | ||||
| 196 | |||||
| 197 | # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. | ||||
| 198 | # And Microsoft has their own naming (again, surprisingly). | ||||
| 199 | # And windows-* is registered in IANA! | ||||
| 200 | define_alias( | ||||
| 201 | qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' ); | ||||
| 202 | |||||
| 203 | # Sometimes seen with a leading zero. | ||||
| 204 | # define_alias( qr/\bcp037\b/i => '"cp37"'); | ||||
| 205 | |||||
| 206 | # Mac Mappings | ||||
| 207 | # predefined in *.ucm; unneeded | ||||
| 208 | # define_alias( qr/\bmacIcelandic$/i => '"macIceland"'); | ||||
| 209 | define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' ); | ||||
| 210 | # http://rt.cpan.org/Ticket/Display.html?id=36326 | ||||
| 211 | define_alias( qr/^macintosh$/i => '"MacRoman"' ); | ||||
| 212 | # https://rt.cpan.org/Ticket/Display.html?id=78125 | ||||
| 213 | define_alias( qr/^macce$/i => '"MacCentralEurRoman"' ); | ||||
| 214 | # Ououououou. gone. They are differente! | ||||
| 215 | # define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); | ||||
| 216 | |||||
| 217 | # Standardize on the dashed versions. | ||||
| 218 | define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' ); | ||||
| 219 | |||||
| 220 | unless ($Encode::ON_EBCDIC) { | ||||
| 221 | |||||
| 222 | # for Encode::CN | ||||
| 223 | define_alias( qr/\beuc.*cn$/i => '"euc-cn"' ); | ||||
| 224 | define_alias( qr/\bcn.*euc$/i => '"euc-cn"' ); | ||||
| 225 | |||||
| 226 | # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' ) | ||||
| 227 | # CP936 doesn't have vendor-addon for GBK, so they're identical. | ||||
| 228 | define_alias( qr/^gbk$/i => '"cp936"' ); | ||||
| 229 | |||||
| 230 | # This fixes gb2312 vs. euc-cn confusion, practically | ||||
| 231 | define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' ); | ||||
| 232 | |||||
| 233 | # for Encode::JP | ||||
| 234 | define_alias( qr/\bjis$/i => '"7bit-jis"' ); | ||||
| 235 | define_alias( qr/\beuc.*jp$/i => '"euc-jp"' ); | ||||
| 236 | define_alias( qr/\bjp.*euc$/i => '"euc-jp"' ); | ||||
| 237 | define_alias( qr/\bujis$/i => '"euc-jp"' ); | ||||
| 238 | define_alias( qr/\bshift.*jis$/i => '"shiftjis"' ); | ||||
| 239 | define_alias( qr/\bsjis$/i => '"shiftjis"' ); | ||||
| 240 | define_alias( qr/\bwindows-31j$/i => '"cp932"' ); | ||||
| 241 | |||||
| 242 | # for Encode::KR | ||||
| 243 | define_alias( qr/\beuc.*kr$/i => '"euc-kr"' ); | ||||
| 244 | define_alias( qr/\bkr.*euc$/i => '"euc-kr"' ); | ||||
| 245 | |||||
| 246 | # This fixes ksc5601 vs. euc-kr confusion, practically | ||||
| 247 | define_alias( qr/(?:x-)?uhc$/i => '"cp949"' ); | ||||
| 248 | define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' ); | ||||
| 249 | define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' ); | ||||
| 250 | |||||
| 251 | # for Encode::TW | ||||
| 252 | define_alias( qr/\bbig-?5$/i => '"big5-eten"' ); | ||||
| 253 | define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' ); | ||||
| 254 | define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' ); | ||||
| 255 | define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' ); | ||||
| 256 | define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' ); | ||||
| 257 | } | ||||
| 258 | |||||
| 259 | # utf8 is blessed :) | ||||
| 260 | define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' ); | ||||
| 261 | |||||
| 262 | # At last, Map white space and _ to '-' | ||||
| 263 | define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); | ||||
| 264 | } | ||||
| 265 | |||||
| 266 | 1; | ||||
| 267 | __END__ |