← Index
NYTProf Performance Profile   « line view »
For nd2: #2 manager: init
  Run on Thu May 2 17:38:58 2019
Reported on Thu May 2 17:40:49 2019

Filename/usr/lib/perl5/5.18.2/x86_64-linux-thread-multi/Encode/Alias.pm
StatementsExecuted 165 statements in 215µs
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
3311223µs223µsEncode::Alias::::find_aliasEncode::Alias::find_alias
0000s0sEncode::Alias::::BEGINEncode::Alias::BEGIN
0000s0sEncode::Alias::::define_aliasEncode::Alias::define_alias
0000s0sEncode::Alias::::init_aliasesEncode::Alias::init_aliases
0000s0sEncode::Alias::::undef_aliasesEncode::Alias::undef_aliases
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Encode::Alias;
2use strict;
3use warnings;
4no warnings 'redefine';
5our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
7
8use base qw(Exporter);
9
10# Public, encouraged API is exported by default
11
12our @EXPORT =
13 qw (
14 define_alias
15 find_alias
16);
17
18our @Alias; # ordered matching list
19our %Alias; # cached known aliases
20
21
# spent 223µs within Encode::Alias::find_alias which was called 33 times, avg 7µs/call: # 33 times (223µs+0s) by Encode::getEncoding at line 113 of Encode.pm, avg 7µs/call
sub find_alias {
223364µs require Encode;
233312µs my $class = shift;
243312µs my $find = shift;
253340µs unless ( exists $Alias{$find} ) {
26 $Alias{$find} = undef; # Recursion guard
27 for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
28 my $alias = $Alias[$i];
29 my $val = $Alias[ $i + 1 ];
30 my $new;
31 if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
32 DEBUG and warn "eval $val";
33 $new = eval $val;
34 DEBUG and $@ and warn "$val, $@";
35 }
36 elsif ( ref($alias) eq 'CODE' ) {
37 DEBUG and warn "$alias", "->", "($find)";
38 $new = $alias->($find);
39 }
40 elsif ( lc($find) eq lc($alias) ) {
41 $new = $val;
42 }
43 if ( defined($new) ) {
44 next if $new eq $find; # avoid (direct) recursion on bugs
45 DEBUG and warn "$alias, $new";
46 my $enc =
47 ( ref($new) ) ? $new : Encode::find_encoding($new);
48 if ($enc) {
49 $Alias{$find} = $enc;
50 last;
51 }
52 }
53 }
54
55 # case insensitive search when canonical is not in all lowercase
56 # RT ticket #7835
57 unless ( $Alias{$find} ) {
58 my $lcfind = lc($find);
59 for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
60 {
61 $lcfind eq lc($name) or next;
62 $Alias{$find} = Encode::find_encoding($name);
63 DEBUG and warn "$find => $name";
64 }
65 }
66 }
67 if (DEBUG) {
68 my $name;
69 if ( my $e = $Alias{$find} ) {
70 $name = $e->name;
71 }
72 else {
73 $name = "";
74 }
75 warn "find_alias($class, $find)->name = $name";
76 }
773386µs return $Alias{$find};
78}
79
80sub define_alias {
81 while (@_) {
82 my ( $alias, $name ) = splice( @_, 0, 2 );
83 unshift( @Alias, $alias => $name ); # newer one has precedence
84 if ( ref($alias) ) {
85
86 # clear %Alias cache to allow overrides
87 my @a = keys %Alias;
88 for my $k (@a) {
89 if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
90 DEBUG and warn "delete \$Alias\{$k\}";
91 delete $Alias{$k};
92 }
93 elsif ( ref($alias) eq 'CODE' && $alias->($k) ) {
94 DEBUG and warn "delete \$Alias\{$k\}";
95 delete $Alias{$k};
96 }
97 }
98 }
99 else {
100 DEBUG and warn "delete \$Alias\{$alias\}";
101 delete $Alias{$alias};
102 }
103 }
104}
105
106# Allow latin-1 style names as well
107# 0 1 2 3 4 5 6 7 8 9 10
108our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
109
110# Allow winlatin1 style names as well
111our %Winlatin2cp = (
112 'latin1' => 1252,
113 'latin2' => 1250,
114 'cyrillic' => 1251,
115 'greek' => 1253,
116 'turkish' => 1254,
117 'hebrew' => 1255,
118 'arabic' => 1256,
119 'baltic' => 1257,
120 'vietnamese' => 1258,
121);
122
123init_aliases();
124
125sub undef_aliases {
126 @Alias = ();
127 %Alias = ();
128}
129
130sub init_aliases {
131 require Encode;
132 undef_aliases();
133
134 # Try all-lower-case version should all else fails
135 define_alias( qr/^(.*)$/ => '"\L$1"' );
136
137 # UTF/UCS stuff
138 define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' );
139 define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
140 define_alias(
141 qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
142 qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
143 qr/^iso-10646-1$/i => '"UCS-2BE"'
144 );
145 define_alias(
146 qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
147 qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
148 qr/^UTF-?(16|32)$/i => '"UTF-$1"',
149 );
150
151 # ASCII
152 define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
153 define_alias( 'C' => 'ascii' );
154 define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' );
155
156 # Allow variants of iso-8859-1 etc.
157 define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
158
159 # At least HP-UX has these.
160 define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
161
162 # More HP stuff.
163 define_alias(
164 qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
165 '"${1}8"' );
166
167 # The Official name of ASCII.
168 define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
169
170 # This is a font issue, not an encoding issue.
171 # (The currency symbol of the Latin 1 upper half
172 # has been redefined as the euro symbol.)
173 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
174
175 define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
176'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
177 );
178
179 define_alias(
180 qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
181 hebrew|arabic|baltic|vietnamese)$/ix =>
182 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
183 );
184
185 # Common names for non-latin preferred MIME names
186 define_alias(
187 'ascii' => 'US-ascii',
188 'cyrillic' => 'iso-8859-5',
189 'arabic' => 'iso-8859-6',
190 'greek' => 'iso-8859-7',
191 'hebrew' => 'iso-8859-8',
192 'thai' => 'iso-8859-11',
193 );
194 # RT #20781
195 define_alias(qr/\btis-?620\b/i => '"iso-8859-11"');
196
197 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
198 # And Microsoft has their own naming (again, surprisingly).
199 # And windows-* is registered in IANA!
200 define_alias(
201 qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
202
203 # Sometimes seen with a leading zero.
204 # define_alias( qr/\bcp037\b/i => '"cp37"');
205
206 # Mac Mappings
207 # predefined in *.ucm; unneeded
208 # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
209 define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' );
210 # http://rt.cpan.org/Ticket/Display.html?id=36326
211 define_alias( qr/^macintosh$/i => '"MacRoman"' );
212 # https://rt.cpan.org/Ticket/Display.html?id=78125
213 define_alias( qr/^macce$/i => '"MacCentralEurRoman"' );
214 # Ououououou. gone. They are differente!
215 # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
216
217 # Standardize on the dashed versions.
218 define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
219
220 unless ($Encode::ON_EBCDIC) {
221
222 # for Encode::CN
223 define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
224 define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
225
226 # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
227 # CP936 doesn't have vendor-addon for GBK, so they're identical.
228 define_alias( qr/^gbk$/i => '"cp936"' );
229
230 # This fixes gb2312 vs. euc-cn confusion, practically
231 define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
232
233 # for Encode::JP
234 define_alias( qr/\bjis$/i => '"7bit-jis"' );
235 define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
236 define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
237 define_alias( qr/\bujis$/i => '"euc-jp"' );
238 define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
239 define_alias( qr/\bsjis$/i => '"shiftjis"' );
240 define_alias( qr/\bwindows-31j$/i => '"cp932"' );
241
242 # for Encode::KR
243 define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
244 define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
245
246 # This fixes ksc5601 vs. euc-kr confusion, practically
247 define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
248 define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
249 define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
250
251 # for Encode::TW
252 define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
253 define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
254 define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
255 define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
256 define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
257 }
258
259 # utf8 is blessed :)
260 define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' );
261
262 # At last, Map white space and _ to '-'
263 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
264}
265
2661;
267__END__