pictcode / lib / Cake / I18n / Multibyte.php @ 0b1b8047
履歴 | 表示 | アノテート | ダウンロード (25.107 KB)
1 | 635eef61 | spyder1211 | <?php
|
---|---|---|---|
2 | /**
|
||
3 | * Multibyte handling methods.
|
||
4 | *
|
||
5 | * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
|
||
6 | * Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
|
||
7 | *
|
||
8 | * Licensed under The MIT License
|
||
9 | * For full copyright and license information, please see the LICENSE.txt
|
||
10 | * Redistributions of files must retain the above copyright notice.
|
||
11 | *
|
||
12 | * @copyright Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
|
||
13 | * @link http://cakephp.org CakePHP(tm) Project
|
||
14 | * @package Cake.I18n
|
||
15 | * @since CakePHP(tm) v 1.2.0.6833
|
||
16 | * @license http://www.opensource.org/licenses/mit-license.php MIT License
|
||
17 | */
|
||
18 | |||
19 | /**
|
||
20 | * Multibyte handling methods.
|
||
21 | *
|
||
22 | * @package Cake.I18n
|
||
23 | */
|
||
24 | class Multibyte { |
||
25 | |||
26 | /**
|
||
27 | * Holds the case folding values
|
||
28 | *
|
||
29 | * @var array
|
||
30 | */
|
||
31 | protected static $_caseFold = array(); |
||
32 | |||
33 | /**
|
||
34 | * Holds an array of Unicode code point ranges
|
||
35 | *
|
||
36 | * @var array
|
||
37 | */
|
||
38 | protected static $_codeRange = array(); |
||
39 | |||
40 | /**
|
||
41 | * Holds the current code point range
|
||
42 | *
|
||
43 | * @var string
|
||
44 | */
|
||
45 | protected static $_table = null; |
||
46 | |||
47 | /**
|
||
48 | * Converts a multibyte character string
|
||
49 | * to the decimal value of the character
|
||
50 | *
|
||
51 | * @param string $string String to convert.
|
||
52 | * @return array
|
||
53 | */
|
||
54 | public static function utf8($string) { |
||
55 | $map = array(); |
||
56 | |||
57 | $values = array(); |
||
58 | $find = 1; |
||
59 | $length = strlen($string); |
||
60 | |||
61 | for ($i = 0; $i < $length; $i++) { |
||
62 | $value = ord($string[$i]); |
||
63 | |||
64 | if ($value < 128) { |
||
65 | $map[] = $value; |
||
66 | } else {
|
||
67 | if (empty($values)) { |
||
68 | $find = ($value < 224) ? 2 : 3; |
||
69 | } |
||
70 | $values[] = $value; |
||
71 | |||
72 | if (count($values) === $find) { |
||
73 | if ($find == 3) { |
||
74 | $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64); |
||
75 | } else {
|
||
76 | $map[] = (($values[0] % 32) * 64) + ($values[1] % 64); |
||
77 | } |
||
78 | $values = array(); |
||
79 | $find = 1; |
||
80 | } |
||
81 | } |
||
82 | } |
||
83 | return $map; |
||
84 | } |
||
85 | |||
86 | /**
|
||
87 | * Converts the decimal value of a multibyte character string
|
||
88 | * to a string
|
||
89 | *
|
||
90 | * @param array $array Values array.
|
||
91 | * @return string
|
||
92 | */
|
||
93 | public static function ascii($array) { |
||
94 | $ascii = ''; |
||
95 | |||
96 | foreach ($array as $utf8) { |
||
97 | if ($utf8 < 128) { |
||
98 | $ascii .= chr($utf8); |
||
99 | } elseif ($utf8 < 2048) { |
||
100 | $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64)); |
||
101 | $ascii .= chr(128 + ($utf8 % 64)); |
||
102 | } else {
|
||
103 | $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096)); |
||
104 | $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64)); |
||
105 | $ascii .= chr(128 + ($utf8 % 64)); |
||
106 | } |
||
107 | } |
||
108 | return $ascii; |
||
109 | } |
||
110 | |||
111 | /**
|
||
112 | * Find position of first occurrence of a case-insensitive string.
|
||
113 | *
|
||
114 | * @param string $haystack The string from which to get the position of the first occurrence of $needle.
|
||
115 | * @param string $needle The string to find in $haystack.
|
||
116 | * @param int $offset The position in $haystack to start searching.
|
||
117 | * @return int|bool The numeric position of the first occurrence of $needle in the $haystack string,
|
||
118 | * or false if $needle is not found.
|
||
119 | */
|
||
120 | public static function stripos($haystack, $needle, $offset = 0) { |
||
121 | if (Multibyte::checkMultibyte($haystack)) { |
||
122 | $haystack = Multibyte::strtoupper($haystack); |
||
123 | $needle = Multibyte::strtoupper($needle); |
||
124 | return Multibyte::strpos($haystack, $needle, $offset); |
||
125 | } |
||
126 | return stripos($haystack, $needle, $offset); |
||
127 | } |
||
128 | |||
129 | /**
|
||
130 | * Finds first occurrence of a string within another, case insensitive.
|
||
131 | *
|
||
132 | * @param string $haystack The string from which to get the first occurrence of $needle.
|
||
133 | * @param string $needle The string to find in $haystack.
|
||
134 | * @param bool $part Determines which portion of $haystack this function returns.
|
||
135 | * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
|
||
136 | * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
|
||
137 | * Default value is false.
|
||
138 | * @return int|bool The portion of $haystack, or false if $needle is not found.
|
||
139 | */
|
||
140 | public static function stristr($haystack, $needle, $part = false) { |
||
141 | $php = (PHP_VERSION < 5.3); |
||
142 | |||
143 | if (($php && $part) || Multibyte::checkMultibyte($haystack)) { |
||
144 | $check = Multibyte::strtoupper($haystack); |
||
145 | $check = Multibyte::utf8($check); |
||
146 | $found = false; |
||
147 | |||
148 | $haystack = Multibyte::utf8($haystack); |
||
149 | $haystackCount = count($haystack); |
||
150 | |||
151 | $needle = Multibyte::strtoupper($needle); |
||
152 | $needle = Multibyte::utf8($needle); |
||
153 | $needleCount = count($needle); |
||
154 | |||
155 | $parts = array(); |
||
156 | $position = 0; |
||
157 | |||
158 | while (($found === false) && ($position < $haystackCount)) { |
||
159 | if (isset($needle[0]) && $needle[0] === $check[$position]) { |
||
160 | for ($i = 1; $i < $needleCount; $i++) { |
||
161 | if ($needle[$i] !== $check[$position + $i]) { |
||
162 | break;
|
||
163 | } |
||
164 | } |
||
165 | if ($i === $needleCount) { |
||
166 | $found = true; |
||
167 | } |
||
168 | } |
||
169 | if (!$found) { |
||
170 | $parts[] = $haystack[$position]; |
||
171 | unset($haystack[$position]); |
||
172 | } |
||
173 | $position++;
|
||
174 | } |
||
175 | |||
176 | if ($found && $part && !empty($parts)) { |
||
177 | return Multibyte::ascii($parts); |
||
178 | } elseif ($found && !empty($haystack)) { |
||
179 | return Multibyte::ascii($haystack); |
||
180 | } |
||
181 | return false; |
||
182 | } |
||
183 | |||
184 | if (!$php) { |
||
185 | return stristr($haystack, $needle, $part); |
||
186 | } |
||
187 | return stristr($haystack, $needle); |
||
188 | } |
||
189 | |||
190 | /**
|
||
191 | * Get string length.
|
||
192 | *
|
||
193 | * @param string $string The string being checked for length.
|
||
194 | * @return int The number of characters in string $string
|
||
195 | */
|
||
196 | public static function strlen($string) { |
||
197 | if (Multibyte::checkMultibyte($string)) { |
||
198 | $string = Multibyte::utf8($string); |
||
199 | return count($string); |
||
200 | } |
||
201 | return strlen($string); |
||
202 | } |
||
203 | |||
204 | /**
|
||
205 | * Find position of first occurrence of a string.
|
||
206 | *
|
||
207 | * @param string $haystack The string being checked.
|
||
208 | * @param string $needle The position counted from the beginning of haystack.
|
||
209 | * @param int $offset The search offset. If it is not specified, 0 is used.
|
||
210 | * @return int|bool The numeric position of the first occurrence of $needle in the $haystack string.
|
||
211 | * If $needle is not found, it returns false.
|
||
212 | */
|
||
213 | public static function strpos($haystack, $needle, $offset = 0) { |
||
214 | if (Multibyte::checkMultibyte($haystack)) { |
||
215 | $found = false; |
||
216 | |||
217 | $haystack = Multibyte::utf8($haystack); |
||
218 | $haystackCount = count($haystack); |
||
219 | |||
220 | $needle = Multibyte::utf8($needle); |
||
221 | $needleCount = count($needle); |
||
222 | |||
223 | $position = $offset; |
||
224 | |||
225 | while (($found === false) && ($position < $haystackCount)) { |
||
226 | if (isset($needle[0]) && $needle[0] === $haystack[$position]) { |
||
227 | for ($i = 1; $i < $needleCount; $i++) { |
||
228 | if ($needle[$i] !== $haystack[$position + $i]) { |
||
229 | break;
|
||
230 | } |
||
231 | } |
||
232 | if ($i === $needleCount) { |
||
233 | $found = true; |
||
234 | $position--;
|
||
235 | } |
||
236 | } |
||
237 | $position++;
|
||
238 | } |
||
239 | if ($found) { |
||
240 | return $position; |
||
241 | } |
||
242 | return false; |
||
243 | } |
||
244 | return strpos($haystack, $needle, $offset); |
||
245 | } |
||
246 | |||
247 | /**
|
||
248 | * Finds the last occurrence of a character in a string within another.
|
||
249 | *
|
||
250 | * @param string $haystack The string from which to get the last occurrence of $needle.
|
||
251 | * @param string $needle The string to find in $haystack.
|
||
252 | * @param bool $part Determines which portion of $haystack this function returns.
|
||
253 | * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
|
||
254 | * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
|
||
255 | * Default value is false.
|
||
256 | * @return string|bool The portion of $haystack. or false if $needle is not found.
|
||
257 | */
|
||
258 | public static function strrchr($haystack, $needle, $part = false) { |
||
259 | $check = Multibyte::utf8($haystack); |
||
260 | $found = false; |
||
261 | |||
262 | $haystack = Multibyte::utf8($haystack); |
||
263 | $haystackCount = count($haystack); |
||
264 | |||
265 | $matches = array_count_values($check); |
||
266 | |||
267 | $needle = Multibyte::utf8($needle); |
||
268 | $needleCount = count($needle); |
||
269 | |||
270 | $parts = array(); |
||
271 | $position = 0; |
||
272 | |||
273 | while (($found === false) && ($position < $haystackCount)) { |
||
274 | if (isset($needle[0]) && $needle[0] === $check[$position]) { |
||
275 | for ($i = 1; $i < $needleCount; $i++) { |
||
276 | if ($needle[$i] !== $check[$position + $i]) { |
||
277 | if ($needle[$i] === $check[($position + $i) - 1]) { |
||
278 | $found = true; |
||
279 | } |
||
280 | unset($parts[$position - 1]); |
||
281 | $haystack = array_merge(array($haystack[$position]), $haystack); |
||
282 | break;
|
||
283 | } |
||
284 | } |
||
285 | if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { |
||
286 | $matches[$needle[0]] = $matches[$needle[0]] - 1; |
||
287 | } elseif ($i === $needleCount) { |
||
288 | $found = true; |
||
289 | } |
||
290 | } |
||
291 | |||
292 | if (!$found && isset($haystack[$position])) { |
||
293 | $parts[] = $haystack[$position]; |
||
294 | unset($haystack[$position]); |
||
295 | } |
||
296 | $position++;
|
||
297 | } |
||
298 | |||
299 | if ($found && $part && !empty($parts)) { |
||
300 | return Multibyte::ascii($parts); |
||
301 | } elseif ($found && !empty($haystack)) { |
||
302 | return Multibyte::ascii($haystack); |
||
303 | } |
||
304 | return false; |
||
305 | } |
||
306 | |||
307 | /**
|
||
308 | * Finds the last occurrence of a character in a string within another, case insensitive.
|
||
309 | *
|
||
310 | * @param string $haystack The string from which to get the last occurrence of $needle.
|
||
311 | * @param string $needle The string to find in $haystack.
|
||
312 | * @param bool $part Determines which portion of $haystack this function returns.
|
||
313 | * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
|
||
314 | * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
|
||
315 | * Default value is false.
|
||
316 | * @return string|bool The portion of $haystack. or false if $needle is not found.
|
||
317 | */
|
||
318 | public static function strrichr($haystack, $needle, $part = false) { |
||
319 | $check = Multibyte::strtoupper($haystack); |
||
320 | $check = Multibyte::utf8($check); |
||
321 | $found = false; |
||
322 | |||
323 | $haystack = Multibyte::utf8($haystack); |
||
324 | $haystackCount = count($haystack); |
||
325 | |||
326 | $matches = array_count_values($check); |
||
327 | |||
328 | $needle = Multibyte::strtoupper($needle); |
||
329 | $needle = Multibyte::utf8($needle); |
||
330 | $needleCount = count($needle); |
||
331 | |||
332 | $parts = array(); |
||
333 | $position = 0; |
||
334 | |||
335 | while (($found === false) && ($position < $haystackCount)) { |
||
336 | if (isset($needle[0]) && $needle[0] === $check[$position]) { |
||
337 | for ($i = 1; $i < $needleCount; $i++) { |
||
338 | if ($needle[$i] !== $check[$position + $i]) { |
||
339 | if ($needle[$i] === $check[($position + $i) - 1]) { |
||
340 | $found = true; |
||
341 | } |
||
342 | unset($parts[$position - 1]); |
||
343 | $haystack = array_merge(array($haystack[$position]), $haystack); |
||
344 | break;
|
||
345 | } |
||
346 | } |
||
347 | if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { |
||
348 | $matches[$needle[0]] = $matches[$needle[0]] - 1; |
||
349 | } elseif ($i === $needleCount) { |
||
350 | $found = true; |
||
351 | } |
||
352 | } |
||
353 | |||
354 | if (!$found && isset($haystack[$position])) { |
||
355 | $parts[] = $haystack[$position]; |
||
356 | unset($haystack[$position]); |
||
357 | } |
||
358 | $position++;
|
||
359 | } |
||
360 | |||
361 | if ($found && $part && !empty($parts)) { |
||
362 | return Multibyte::ascii($parts); |
||
363 | } elseif ($found && !empty($haystack)) { |
||
364 | return Multibyte::ascii($haystack); |
||
365 | } |
||
366 | return false; |
||
367 | } |
||
368 | |||
369 | /**
|
||
370 | * Finds position of last occurrence of a string within another, case insensitive
|
||
371 | *
|
||
372 | * @param string $haystack The string from which to get the position of the last occurrence of $needle.
|
||
373 | * @param string $needle The string to find in $haystack.
|
||
374 | * @param int $offset The position in $haystack to start searching.
|
||
375 | * @return int|bool The numeric position of the last occurrence of $needle in the $haystack string,
|
||
376 | * or false if $needle is not found.
|
||
377 | */
|
||
378 | public static function strripos($haystack, $needle, $offset = 0) { |
||
379 | if (Multibyte::checkMultibyte($haystack)) { |
||
380 | $found = false; |
||
381 | $haystack = Multibyte::strtoupper($haystack); |
||
382 | $haystack = Multibyte::utf8($haystack); |
||
383 | $haystackCount = count($haystack); |
||
384 | |||
385 | $matches = array_count_values($haystack); |
||
386 | |||
387 | $needle = Multibyte::strtoupper($needle); |
||
388 | $needle = Multibyte::utf8($needle); |
||
389 | $needleCount = count($needle); |
||
390 | |||
391 | $position = $offset; |
||
392 | |||
393 | while (($found === false) && ($position < $haystackCount)) { |
||
394 | if (isset($needle[0]) && $needle[0] === $haystack[$position]) { |
||
395 | for ($i = 1; $i < $needleCount; $i++) { |
||
396 | if ($needle[$i] !== $haystack[$position + $i]) { |
||
397 | if ($needle[$i] === $haystack[($position + $i) - 1]) { |
||
398 | $position--;
|
||
399 | $found = true; |
||
400 | continue;
|
||
401 | } |
||
402 | } |
||
403 | } |
||
404 | |||
405 | if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { |
||
406 | $matches[$needle[0]] = $matches[$needle[0]] - 1; |
||
407 | } elseif ($i === $needleCount) { |
||
408 | $found = true; |
||
409 | $position--;
|
||
410 | } |
||
411 | } |
||
412 | $position++;
|
||
413 | } |
||
414 | return ($found) ? $position : false; |
||
415 | } |
||
416 | return strripos($haystack, $needle, $offset); |
||
417 | } |
||
418 | |||
419 | /**
|
||
420 | * Find position of last occurrence of a string in a string.
|
||
421 | *
|
||
422 | * @param string $haystack The string being checked, for the last occurrence of $needle.
|
||
423 | * @param string $needle The string to find in $haystack.
|
||
424 | * @param int $offset May be specified to begin searching an arbitrary number of characters into the string.
|
||
425 | * Negative values will stop searching at an arbitrary point prior to the end of the string.
|
||
426 | * @return int|bool The numeric position of the last occurrence of $needle in the $haystack string.
|
||
427 | * If $needle is not found, it returns false.
|
||
428 | */
|
||
429 | public static function strrpos($haystack, $needle, $offset = 0) { |
||
430 | if (Multibyte::checkMultibyte($haystack)) { |
||
431 | $found = false; |
||
432 | |||
433 | $haystack = Multibyte::utf8($haystack); |
||
434 | $haystackCount = count($haystack); |
||
435 | |||
436 | $matches = array_count_values($haystack); |
||
437 | |||
438 | $needle = Multibyte::utf8($needle); |
||
439 | $needleCount = count($needle); |
||
440 | |||
441 | $position = $offset; |
||
442 | |||
443 | while (($found === false) && ($position < $haystackCount)) { |
||
444 | if (isset($needle[0]) && $needle[0] === $haystack[$position]) { |
||
445 | for ($i = 1; $i < $needleCount; $i++) { |
||
446 | if ($needle[$i] !== $haystack[$position + $i]) { |
||
447 | if ($needle[$i] === $haystack[($position + $i) - 1]) { |
||
448 | $position--;
|
||
449 | $found = true; |
||
450 | continue;
|
||
451 | } |
||
452 | } |
||
453 | } |
||
454 | |||
455 | if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) { |
||
456 | $matches[$needle[0]] = $matches[$needle[0]] - 1; |
||
457 | } elseif ($i === $needleCount) { |
||
458 | $found = true; |
||
459 | $position--;
|
||
460 | } |
||
461 | } |
||
462 | $position++;
|
||
463 | } |
||
464 | return ($found) ? $position : false; |
||
465 | } |
||
466 | return strrpos($haystack, $needle, $offset); |
||
467 | } |
||
468 | |||
469 | /**
|
||
470 | * Finds first occurrence of a string within another
|
||
471 | *
|
||
472 | * @param string $haystack The string from which to get the first occurrence of $needle.
|
||
473 | * @param string $needle The string to find in $haystack
|
||
474 | * @param bool $part Determines which portion of $haystack this function returns.
|
||
475 | * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
|
||
476 | * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
|
||
477 | * Default value is FALSE.
|
||
478 | * @return string|bool The portion of $haystack, or true if $needle is not found.
|
||
479 | */
|
||
480 | public static function strstr($haystack, $needle, $part = false) { |
||
481 | $php = (PHP_VERSION < 5.3); |
||
482 | |||
483 | if (($php && $part) || Multibyte::checkMultibyte($haystack)) { |
||
484 | $check = Multibyte::utf8($haystack); |
||
485 | $found = false; |
||
486 | |||
487 | $haystack = Multibyte::utf8($haystack); |
||
488 | $haystackCount = count($haystack); |
||
489 | |||
490 | $needle = Multibyte::utf8($needle); |
||
491 | $needleCount = count($needle); |
||
492 | |||
493 | $parts = array(); |
||
494 | $position = 0; |
||
495 | |||
496 | while (($found === false) && ($position < $haystackCount)) { |
||
497 | if (isset($needle[0]) && $needle[0] === $check[$position]) { |
||
498 | for ($i = 1; $i < $needleCount; $i++) { |
||
499 | if ($needle[$i] !== $check[$position + $i]) { |
||
500 | break;
|
||
501 | } |
||
502 | } |
||
503 | if ($i === $needleCount) { |
||
504 | $found = true; |
||
505 | } |
||
506 | } |
||
507 | if (!$found) { |
||
508 | $parts[] = $haystack[$position]; |
||
509 | unset($haystack[$position]); |
||
510 | } |
||
511 | $position++;
|
||
512 | } |
||
513 | |||
514 | if ($found && $part && !empty($parts)) { |
||
515 | return Multibyte::ascii($parts); |
||
516 | } elseif ($found && !empty($haystack)) { |
||
517 | return Multibyte::ascii($haystack); |
||
518 | } |
||
519 | return false; |
||
520 | } |
||
521 | |||
522 | if (!$php) { |
||
523 | return strstr($haystack, $needle, $part); |
||
524 | } |
||
525 | return strstr($haystack, $needle); |
||
526 | } |
||
527 | |||
528 | /**
|
||
529 | * Make a string lowercase
|
||
530 | *
|
||
531 | * @param string $string The string being lowercased.
|
||
532 | * @return string with all alphabetic characters converted to lowercase.
|
||
533 | */
|
||
534 | public static function strtolower($string) { |
||
535 | $utf8Map = Multibyte::utf8($string); |
||
536 | |||
537 | $length = count($utf8Map); |
||
538 | $lowerCase = array(); |
||
539 | |||
540 | for ($i = 0; $i < $length; $i++) { |
||
541 | $char = $utf8Map[$i]; |
||
542 | |||
543 | if ($char < 128) { |
||
544 | $str = strtolower(chr($char)); |
||
545 | $strlen = strlen($str); |
||
546 | for ($ii = 0; $ii < $strlen; $ii++) { |
||
547 | $lower = ord(substr($str, $ii, 1)); |
||
548 | } |
||
549 | $lowerCase[] = $lower; |
||
550 | $matched = true; |
||
551 | } else {
|
||
552 | $matched = false; |
||
553 | $keys = static::_find($char, 'upper'); |
||
554 | |||
555 | if (!empty($keys)) { |
||
556 | foreach ($keys as $key => $value) { |
||
557 | if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) { |
||
558 | $lowerCase[] = $keys[$key]['lower'][0]; |
||
559 | $matched = true; |
||
560 | break 1; |
||
561 | } |
||
562 | } |
||
563 | } |
||
564 | } |
||
565 | if ($matched === false) { |
||
566 | $lowerCase[] = $char; |
||
567 | } |
||
568 | } |
||
569 | return Multibyte::ascii($lowerCase); |
||
570 | } |
||
571 | |||
572 | /**
|
||
573 | * Make a string uppercase
|
||
574 | *
|
||
575 | * @param string $string The string being uppercased.
|
||
576 | * @return string with all alphabetic characters converted to uppercase.
|
||
577 | */
|
||
578 | public static function strtoupper($string) { |
||
579 | $utf8Map = Multibyte::utf8($string); |
||
580 | |||
581 | $length = count($utf8Map); |
||
582 | $replaced = array(); |
||
583 | $upperCase = array(); |
||
584 | |||
585 | for ($i = 0; $i < $length; $i++) { |
||
586 | $char = $utf8Map[$i]; |
||
587 | |||
588 | if ($char < 128) { |
||
589 | $str = strtoupper(chr($char)); |
||
590 | $strlen = strlen($str); |
||
591 | for ($ii = 0; $ii < $strlen; $ii++) { |
||
592 | $upper = ord(substr($str, $ii, 1)); |
||
593 | } |
||
594 | $upperCase[] = $upper; |
||
595 | $matched = true; |
||
596 | |||
597 | } else {
|
||
598 | $matched = false; |
||
599 | $keys = static::_find($char); |
||
600 | $keyCount = count($keys); |
||
601 | |||
602 | if (!empty($keys)) { |
||
603 | foreach ($keys as $key => $value) { |
||
604 | $matched = false; |
||
605 | $replace = 0; |
||
606 | if ($length > 1 && count($keys[$key]['lower']) > 1) { |
||
607 | $j = 0; |
||
608 | |||
609 | for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) { |
||
610 | $nextChar = $utf8Map[$i + $ii]; |
||
611 | |||
612 | if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) { |
||
613 | $replace++;
|
||
614 | } |
||
615 | } |
||
616 | if ($replace == $count) { |
||
617 | $upperCase[] = $keys[$key]['upper']; |
||
618 | $replaced = array_merge($replaced, array_values($keys[$key]['lower'])); |
||
619 | $matched = true; |
||
620 | break 1; |
||
621 | } |
||
622 | } elseif ($length > 1 && $keyCount > 1) { |
||
623 | $j = 0; |
||
624 | for ($ii = 1; $ii < $keyCount; $ii++) { |
||
625 | $nextChar = $utf8Map[$i + $ii - 1]; |
||
626 | |||
627 | if (in_array($nextChar, $keys[$ii]['lower'])) { |
||
628 | |||
629 | for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) { |
||
630 | $nextChar = $utf8Map[$i + $jj]; |
||
631 | |||
632 | if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) { |
||
633 | $replace++;
|
||
634 | } |
||
635 | } |
||
636 | if ($replace == $count) { |
||
637 | $upperCase[] = $keys[$ii]['upper']; |
||
638 | $replaced = array_merge($replaced, array_values($keys[$ii]['lower'])); |
||
639 | $matched = true; |
||
640 | break 2; |
||
641 | } |
||
642 | } |
||
643 | } |
||
644 | } |
||
645 | if ($keys[$key]['lower'][0] == $char) { |
||
646 | $upperCase[] = $keys[$key]['upper']; |
||
647 | $matched = true; |
||
648 | break 1; |
||
649 | } |
||
650 | } |
||
651 | } |
||
652 | } |
||
653 | if ($matched === false && !in_array($char, $replaced, true)) { |
||
654 | $upperCase[] = $char; |
||
655 | } |
||
656 | } |
||
657 | return Multibyte::ascii($upperCase); |
||
658 | } |
||
659 | |||
660 | /**
|
||
661 | * Count the number of substring occurrences
|
||
662 | *
|
||
663 | * @param string $haystack The string being checked.
|
||
664 | * @param string $needle The string being found.
|
||
665 | * @return int The number of times the $needle substring occurs in the $haystack string.
|
||
666 | */
|
||
667 | public static function substrCount($haystack, $needle) { |
||
668 | $count = 0; |
||
669 | $haystack = Multibyte::utf8($haystack); |
||
670 | $haystackCount = count($haystack); |
||
671 | $matches = array_count_values($haystack); |
||
672 | $needle = Multibyte::utf8($needle); |
||
673 | $needleCount = count($needle); |
||
674 | |||
675 | if ($needleCount === 1 && isset($matches[$needle[0]])) { |
||
676 | return $matches[$needle[0]]; |
||
677 | } |
||
678 | |||
679 | for ($i = 0; $i < $haystackCount; $i++) { |
||
680 | if (isset($needle[0]) && $needle[0] === $haystack[$i]) { |
||
681 | for ($ii = 1; $ii < $needleCount; $ii++) { |
||
682 | if ($needle[$ii] === $haystack[$i + 1]) { |
||
683 | if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) { |
||
684 | $count--;
|
||
685 | } else {
|
||
686 | $count++;
|
||
687 | } |
||
688 | } |
||
689 | } |
||
690 | } |
||
691 | } |
||
692 | return $count; |
||
693 | } |
||
694 | |||
695 | /**
|
||
696 | * Get part of string
|
||
697 | *
|
||
698 | * @param string $string The string being checked.
|
||
699 | * @param int $start The first position used in $string.
|
||
700 | * @param int $length The maximum length of the returned string.
|
||
701 | * @return string The portion of $string specified by the $string and $length parameters.
|
||
702 | */
|
||
703 | public static function substr($string, $start, $length = null) { |
||
704 | if ($start === 0 && $length === null) { |
||
705 | return $string; |
||
706 | } |
||
707 | |||
708 | $string = Multibyte::utf8($string); |
||
709 | |||
710 | for ($i = 1; $i <= $start; $i++) { |
||
711 | unset($string[$i - 1]); |
||
712 | } |
||
713 | |||
714 | if ($length === null || count($string) < $length) { |
||
715 | return Multibyte::ascii($string); |
||
716 | } |
||
717 | $string = array_values($string); |
||
718 | |||
719 | $value = array(); |
||
720 | for ($i = 0; $i < $length; $i++) { |
||
721 | $value[] = $string[$i]; |
||
722 | } |
||
723 | return Multibyte::ascii($value); |
||
724 | } |
||
725 | |||
726 | /**
|
||
727 | * Prepare a string for mail transport, using the provided encoding
|
||
728 | *
|
||
729 | * @param string $string value to encode
|
||
730 | * @param string $charset charset to use for encoding. defaults to UTF-8
|
||
731 | * @param string $newline Newline string.
|
||
732 | * @return string
|
||
733 | */
|
||
734 | public static function mimeEncode($string, $charset = null, $newline = "\r\n") { |
||
735 | if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) { |
||
736 | return $string; |
||
737 | } |
||
738 | |||
739 | if (empty($charset)) { |
||
740 | $charset = Configure::read('App.encoding'); |
||
741 | } |
||
742 | $charset = strtoupper($charset); |
||
743 | |||
744 | $start = '=?' . $charset . '?B?'; |
||
745 | $end = '?='; |
||
746 | $spacer = $end . $newline . ' ' . $start; |
||
747 | |||
748 | $length = 75 - strlen($start) - strlen($end); |
||
749 | $length = $length - ($length % 4); |
||
750 | if ($charset === 'UTF-8') { |
||
751 | $parts = array(); |
||
752 | $maxchars = floor(($length * 3) / 4); |
||
753 | $stringLength = strlen($string); |
||
754 | while ($stringLength > $maxchars) { |
||
755 | $i = (int)$maxchars; |
||
756 | $test = ord($string[$i]); |
||
757 | while ($test >= 128 && $test <= 191) { |
||
758 | $i--;
|
||
759 | $test = ord($string[$i]); |
||
760 | } |
||
761 | $parts[] = base64_encode(substr($string, 0, $i)); |
||
762 | $string = substr($string, $i); |
||
763 | $stringLength = strlen($string); |
||
764 | } |
||
765 | $parts[] = base64_encode($string); |
||
766 | $string = implode($spacer, $parts); |
||
767 | } else {
|
||
768 | $string = chunk_split(base64_encode($string), $length, $spacer); |
||
769 | $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string); |
||
770 | } |
||
771 | return $start . $string . $end; |
||
772 | } |
||
773 | |||
774 | /**
|
||
775 | * Return the Code points range for Unicode characters
|
||
776 | *
|
||
777 | * @param int $decimal Decimal value.
|
||
778 | * @return string
|
||
779 | */
|
||
780 | protected static function _codepoint($decimal) { |
||
781 | if ($decimal > 128 && $decimal < 256) { |
||
782 | $return = '0080_00ff'; // Latin-1 Supplement |
||
783 | } elseif ($decimal < 384) { |
||
784 | $return = '0100_017f'; // Latin Extended-A |
||
785 | } elseif ($decimal < 592) { |
||
786 | $return = '0180_024F'; // Latin Extended-B |
||
787 | } elseif ($decimal < 688) { |
||
788 | $return = '0250_02af'; // IPA Extensions |
||
789 | } elseif ($decimal >= 880 && $decimal < 1024) { |
||
790 | $return = '0370_03ff'; // Greek and Coptic |
||
791 | } elseif ($decimal < 1280) { |
||
792 | $return = '0400_04ff'; // Cyrillic |
||
793 | } elseif ($decimal < 1328) { |
||
794 | $return = '0500_052f'; // Cyrillic Supplement |
||
795 | } elseif ($decimal < 1424) { |
||
796 | $return = '0530_058f'; // Armenian |
||
797 | } elseif ($decimal >= 7680 && $decimal < 7936) { |
||
798 | $return = '1e00_1eff'; // Latin Extended Additional |
||
799 | } elseif ($decimal < 8192) { |
||
800 | $return = '1f00_1fff'; // Greek Extended |
||
801 | } elseif ($decimal >= 8448 && $decimal < 8528) { |
||
802 | $return = '2100_214f'; // Letterlike Symbols |
||
803 | } elseif ($decimal < 8592) { |
||
804 | $return = '2150_218f'; // Number Forms |
||
805 | } elseif ($decimal >= 9312 && $decimal < 9472) { |
||
806 | $return = '2460_24ff'; // Enclosed Alphanumerics |
||
807 | } elseif ($decimal >= 11264 && $decimal < 11360) { |
||
808 | $return = '2c00_2c5f'; // Glagolitic |
||
809 | } elseif ($decimal < 11392) { |
||
810 | $return = '2c60_2c7f'; // Latin Extended-C |
||
811 | } elseif ($decimal < 11520) { |
||
812 | $return = '2c80_2cff'; // Coptic |
||
813 | } elseif ($decimal >= 65280 && $decimal < 65520) { |
||
814 | $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms |
||
815 | } else {
|
||
816 | $return = false; |
||
817 | } |
||
818 | static::$_codeRange[$decimal] = $return; |
||
819 | return $return; |
||
820 | } |
||
821 | |||
822 | /**
|
||
823 | * Find the related code folding values for $char
|
||
824 | *
|
||
825 | * @param int $char decimal value of character
|
||
826 | * @param string $type Type 'lower' or 'upper'. Defaults to 'lower'.
|
||
827 | * @return array
|
||
828 | */
|
||
829 | protected static function _find($char, $type = 'lower') { |
||
830 | $found = array(); |
||
831 | if (!isset(static::$_codeRange[$char])) { |
||
832 | $range = static::_codepoint($char); |
||
833 | if ($range === false) { |
||
834 | return array(); |
||
835 | } |
||
836 | if (!Configure::configured('_cake_core_')) { |
||
837 | App::uses('PhpReader', 'Configure'); |
||
838 | Configure::config('_cake_core_', new PhpReader(CAKE . 'Config' . DS)); |
||
839 | } |
||
840 | Configure::load('unicode' . DS . 'casefolding' . DS . $range, '_cake_core_'); |
||
841 | static::$_caseFold[$range] = Configure::read($range); |
||
842 | Configure::delete($range); |
||
843 | } |
||
844 | |||
845 | if (!static::$_codeRange[$char]) { |
||
846 | return array(); |
||
847 | } |
||
848 | static::$_table = static::$_codeRange[$char]; |
||
849 | $count = count(static::$_caseFold[static::$_table]); |
||
850 | |||
851 | for ($i = 0; $i < $count; $i++) { |
||
852 | if ($type === 'lower' && static::$_caseFold[static::$_table][$i][$type][0] === $char) { |
||
853 | $found[] = static::$_caseFold[static::$_table][$i]; |
||
854 | } elseif ($type === 'upper' && static::$_caseFold[static::$_table][$i][$type] === $char) { |
||
855 | $found[] = static::$_caseFold[static::$_table][$i]; |
||
856 | } |
||
857 | } |
||
858 | return $found; |
||
859 | } |
||
860 | |||
861 | /**
|
||
862 | * Check the $string for multibyte characters
|
||
863 | *
|
||
864 | * @param string $string Value to test.
|
||
865 | * @return bool
|
||
866 | */
|
||
867 | public static function checkMultibyte($string) { |
||
868 | $length = strlen($string); |
||
869 | |||
870 | for ($i = 0; $i < $length; $i++) { |
||
871 | $value = ord(($string[$i])); |
||
872 | if ($value > 128) { |
||
873 | return true; |
||
874 | } |
||
875 | } |
||
876 | return false; |
||
877 | } |
||
878 | |||
879 | } |