summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Roach <fisharebest@gmail.com>2017-01-05 11:04:28 +0000
committerGreg Roach <fisharebest@gmail.com>2017-01-05 11:18:04 +0000
commit991b93ddf82ec1af763df7d5e82b5c7c43ed1347 (patch)
tree439b9ea6635f41c40c277e101c426171072d8b4a
parent2032bcdd12e743ce4811757ac21942de54105026 (diff)
downloadwebtrees-991b93ddf82ec1af763df7d5e82b5c7c43ed1347.tar.gz
webtrees-991b93ddf82ec1af763df7d5e82b5c7c43ed1347.tar.bz2
webtrees-991b93ddf82ec1af763df7d5e82b5c7c43ed1347.zip
#1175 - use the php-intl collator class, where it is installed
-rw-r--r--app/I18N.php186
1 files changed, 64 insertions, 122 deletions
diff --git a/app/I18N.php b/app/I18N.php
index 0dc59ebea6..51e8d7d120 100644
--- a/app/I18N.php
+++ b/app/I18N.php
@@ -15,6 +15,7 @@
*/
namespace Fisharebest\Webtrees;
+use Collator;
use Exception;
use Fisharebest\ExtCalendar\ArabicCalendar;
use Fisharebest\ExtCalendar\CalendarInterface;
@@ -37,25 +38,21 @@ class I18N {
/** @var Translator An object that performs translation*/
private static $translator;
+ /** @var Collator From the php-intl library */
+ private static $collator;
+
// Digits are always rendered LTR, even in RTL text.
const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
- // Reversable character conversions from the UNICODE 5.1 database.
- // It excludes ambiguous (turkish dotless i) and mixed-case (Dz) characters.
- // The characters should be arranged in default unicode-collation order.
- const ALPHABET_LOWER = 'aàáâãäåāăąǎǟǡǻȁȃȧḁạảấầẩẫậắằẳẵặⓐaæǣǽbḃḅḇⓑbƀɓƃcçćĉċčḉⅽⓒcƈdďḋḍḏḑḓⅾⓓddždzđɖɗƌðeèéêëēĕėęěȅȇȩḕḗḙḛḝẹẻẽếềểễệⓔeǝəɛfḟⓕfƒgĝğġģǧǵḡⓖgǥɠɣƣhĥȟḣḥḧḩḫⓗhƕħiìíîïĩīĭįǐȉȋḭḯỉịⅰⓘiⅱⅲijⅳⅸɨɩjĵⓙjkķǩḱḳḵⓚkƙlĺļľḷḹḻḽⅼⓛlŀljłƚmḿṁṃⅿⓜmnñńņňǹṅṇṉṋⓝnnjɲƞŋoòóôõöōŏőơǒǫǭȍȏȫȭȯȱṍṏṑṓọỏốồổỗộớờởỡợⓞoœøǿɔɵȣpṕṗⓟpƥqⓠqrŕŗřȑȓṙṛṝṟⓡrʀsśŝşšșṡṣṥṧṩⓢsʃtţťțṫṭṯṱⓣtŧƭʈuùúûüũūŭůűųưǔǖǘǚǜȕȗṳṵṷṹṻụủứừửữựⓤuʉɯʊvṽṿⅴⓥvⅵⅶⅷʋʌwŵẁẃẅẇẉⓦwxẋẍⅹⓧxⅺⅻyýÿŷȳẏỳỵỷỹⓨyƴzźżžẑẓẕⓩzƶȥǯʒƹȝþƿƨƽƅάαἀἁἂἃἄἅἆἇὰάᾀᾁᾂᾃᾄᾅᾆᾇᾰᾱᾳβγδέεἐἑἒἓἔἕὲέϝϛζήηἠἡἢἣἤἥἦἧὴήᾐᾑᾒᾓᾔᾕᾖᾗῃθϊἰἱἲἳἴἵἶἷὶίῐῑκϗλμνξοόὀὁὂὃὄὅὸόπϟϙρῥσϲτυϋύὑὓὕὗὺύῠῡφχψωώὠὡὢὣὤὥὦὧὼώᾠᾡᾢᾣᾤᾥᾦᾧῳϡϸϻϣϥϧϩϫϭϯаӑӓәӛӕбвгґғҕдԁђԃѓҙеѐёӗєжӂӝҗзԅӟѕӡԇиѝӣҋӥіїйјкқӄҡҟҝлӆљԉмӎнӊңӈҥњԋоӧөӫпҧҁрҏсԍҫтԏҭћќуӯўӱӳүұѹфхҳһѡѿѽѻцҵчӵҷӌҹҽҿџшщъыӹьҍѣэӭюяѥѧѫѩѭѯѱѳѵѷҩաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆȼɂɇɉɋɍɏͱͳͷͻͼͽӏӷӻӽӿԑԓԕԗԙԛԝԟԡԣԥᵹᵽỻỽỿⅎↄⰰⰱⰲⰳⰴⰵⰶⰷⰸⰹⰺⰻⰼⰽⰾⰿⱀⱁⱂⱃⱄⱅⱆⱇⱈⱉⱊⱋⱌⱍⱎⱏⱐⱑⱒⱓⱔⱕⱖⱗⱘⱙⱚⱛⱜⱝⱞⱡⱨⱪⱬⱳⱶⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳬⳮⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥꙁꙃꙅꙇꙉꙋꙍꙏꙑꙓꙕꙗꙙꙛꙝꙟꙣꙥꙧꙩꙫꙭꚁꚃꚅꚇꚉꚋꚍꚏꚑꚓꚕꚗꜣꜥꜧꜩꜫꜭꜯꜳꜵꜷꜹꜻꜽꜿꝁꝃꝅꝇꝉꝋꝍꝏꝑꝓꝕꝗꝙꝛꝝꝟꝡꝣꝥꝧꝩꝫꝭꝯꝺꝼꝿꞁꞃꞅꞇꞌ';
- const ALPHABET_UPPER = 'AÀÁÂÃÄÅĀĂĄǍǞǠǺȀȂȦḀẠẢẤẦẨẪẬẮẰẲẴẶⒶAÆǢǼBḂḄḆⒷBɃƁƂCÇĆĈĊČḈⅭⒸCƇDĎḊḌḎḐḒⅮⒹDDŽDZĐƉƊƋÐEÈÉÊËĒĔĖĘĚȄȆȨḔḖḘḚḜẸẺẼẾỀỂỄỆⒺEƎƏƐFḞⒻFƑGĜĞĠĢǦǴḠⒼGǤƓƔƢHĤȞḢḤḦḨḪⒽHǶĦIÌÍÎÏĨĪĬĮǏȈȊḬḮỈỊⅠⒾIⅡⅢIJⅣⅨƗƖJĴⒿJKĶǨḰḲḴⓀKƘLĹĻĽḶḸḺḼⅬⓁLĿLJŁȽMḾṀṂⅯⓂMNÑŃŅŇǸṄṆṈṊⓃNNJƝȠŊOÒÓÔÕÖŌŎŐƠǑǪǬȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢⓄOŒØǾƆƟȢPṔṖⓅPƤQⓆQRŔŖŘȐȒṘṚṜṞⓇRƦSŚŜŞŠȘṠṢṤṦṨⓈSƩTŢŤȚṪṬṮṰⓉTŦƬƮUÙÚÛÜŨŪŬŮŰŲƯǓǕǗǙǛȔȖṲṴṶṸṺỤỦỨỪỬỮỰⓊUɄƜƱVṼṾⅤⓋVⅥⅦⅧƲɅWŴẀẂẄẆẈⓌWXẊẌⅩⓍXⅪⅫYÝŸŶȲẎỲỴỶỸⓎYƳZŹŻŽẐẒẔⓏZƵȤǮƷƸȜÞǷƧƼƄΆΑἈἉἊἋἌἍἎἏᾺΆᾈᾉᾊᾋᾌᾍᾎᾏᾸᾹᾼΒΓΔΈΕἘἙἚἛἜἝῈΈϜϚΖΉΗἨἩἪἫἬἭἮἯῊΉᾘᾙᾚᾛᾜᾝᾞᾟῌΘΪἸἹἺἻἼἽἾἿῚΊῘῙΚϏΛΜΝΞΟΌὈὉὊὋὌὍῸΌΠϞϘΡῬΣϹΤΥΫΎὙὛὝὟῪΎῨῩΦΧΨΩΏὨὩὪὫὬὭὮὯῺΏᾨᾩᾪᾫᾬᾭᾮᾯῼϠϷϺϢϤϦϨϪϬϮАӐӒӘӚӔБВГҐҒҔДԀЂԂЃҘЕЀЁӖЄЖӁӜҖЗԄӞЅӠԆИЍӢҊӤІЇЙЈКҚӃҠҞҜЛӅЉԈМӍНӉҢӇҤЊԊОӦӨӪПҦҀРҎСԌҪТԎҬЋЌУӮЎӰӲҮҰѸФХҲҺѠѾѼѺЦҴЧӴҶӋҸҼҾЏШЩЪЫӸЬҌѢЭӬЮЯѤѦѪѨѬѮѰѲѴѶҨԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖȻɁɆɈɊɌɎͰͲͶϽϾϿӀӶӺӼӾԐԒԔԖԘԚԜԞԠԢԤꝽⱣỺỼỾℲↃⰀⰁⰂⰃⰄⰅⰆⰇⰈⰉⰊⰋⰌⰍⰎⰏⰐⰑⰒⰓⰔⰕⰖⰗⰘⰙⰚⰛⰜⰝⰞⰟⰠⰡⰢⰣⰤⰥⰦⰧⰨⰩⰪⰫⰬⰭⰮⱠⱧⱩⱫⱲⱵⲀⲂⲄⲆⲈⲊⲌⲎⲐⲒⲔⲖⲘⲚⲜⲞⲠⲢⲤⲦⲨⲪⲬⲮⲰⲲⲴⲶⲸⲺⲼⲾⳀⳂⳄⳆⳈⳊⳌⳎⳐⳒⳔⳖⳘⳚⳜⳞⳠⳢⳫⳭႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅꙀꙂꙄꙆꙈꙊꙌꙎꙐꙒꙔꙖꙘꙚꙜꙞꙢꙤꙦꙨꙪꙬꚀꚂꚄꚆꚈꚊꚌꚎꚐꚒꚔꚖꜢꜤꜦꜨꜪꜬꜮꜲꜴꜶꜸꜺꜼꜾꝀꝂꝄꝆꝈꝊꝌꝎꝐꝒꝔꝖꝘꝚꝜꝞꝠꝢꝤꝦꝨꝪꝬꝮꝹꝻꝾꞀꞂꞄꞆꞋ';
-
- /** @var string Alphabet, in lower case, for the current locale. */
- private static $alphabet_lower = 'abcdefghijklmnopqrstuvwxyz';
+ // These locales need special handling for the dotless letter I.
+ const DOTLESS_I_LOCALES = ['az', 'tr'];
+ const DOTLESS_I_TOLOWER = ['I' => 'ı', 'İ' => 'i'];
+ const DOTLESS_I_TOUPPER = ['ı' => 'I', 'i' => 'İ'];
- /** @var string Alphabet, in upper case, for the current locale. */
- private static $alphabet_upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
-
- /** @var int[][] Character ranges used by each script. */
- private static $scripts = [
- ['Latn', 0x0041, 0x005A], // a-z
- ['Latn', 0x0061, 0x007A], // A-Z
+ // The ranges of characters used by each script.
+ const SCRIPT_CHARACTER_RANGES = [
+ ['Latn', 0x0041, 0x005A],
+ ['Latn', 0x0061, 0x007A],
['Latn', 0x0100, 0x02AF],
['Grek', 0x0370, 0x03FF],
['Cyrl', 0x0400, 0x052F],
@@ -75,26 +72,33 @@ class I18N {
['Hans', 0x20000, 0x2FA1F], // Mixed CJK, not just Hans
];
- /** @var string[] Characters that are displayed in mirror form in RTL text. */
- private static $mirror_characters = [
- '(' => ')',
- ')' => '(',
- '[' => ']',
- ']' => '[',
- '{' => '}',
- '}' => '{',
- '<' => '>',
- '>' => '<',
- '‹' => '›',
- '›' => '‹',
- '«' => '»',
- '»' => '«',
- '﴾' => '﴿',
- '﴿' => '﴾',
- '“' => '”',
- '”' => '“',
- '‘' => '’',
- '’' => '‘',
+ // Characters that are displayed in mirror form in RTL text.
+ const MIRROR_CHARACTERS = [
+ '(' => ')',
+ ')' => '(',
+ '[' => ']',
+ ']' => '[',
+ '{' => '}',
+ '}' => '{',
+ '<' => '>',
+ '>' => '<',
+ '‹' => '›',
+ '›' => '‹',
+ '«' => '»',
+ '»' => '«',
+ '﴾' => '﴿',
+ '﴿' => '﴾',
+ '“' => '”',
+ '”' => '“',
+ '‘' => '’',
+ '’' => '‘',
+ ];
+
+ // Default list of locales to show in the menu.
+ const DEFAULT_LOCALES = [
+ 'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es',
+ 'et', 'fi', 'fr', 'he', 'hr', 'hu', 'is', 'it', 'ka', 'lt', 'mr', 'nb',
+ 'nl', 'nn', 'pl', 'pt', 'ru', 'sk', 'sv', 'tr', 'uk', 'vi', 'zh-Hans',
];
/** @var string Punctuation used to separate list items, typically a comma */
@@ -108,14 +112,10 @@ class I18N {
public static function activeLocales() {
$code_list = Site::getPreference('LANGUAGES');
- if ($code_list) {
- $codes = explode(',', $code_list);
+ if (empty($code_list)) {
+ $codes = self::DEFAULT_LOCALES;
} else {
- $codes = [
- 'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es',
- 'et', 'fi', 'fr', 'he', 'hr', 'hu', 'is', 'it', 'ka', 'lt', 'mr', 'nb',
- 'nl', 'nn', 'pl', 'pt', 'ru', 'sk', 'sv', 'tr', 'uk', 'vi', 'zh-Hans',
- ];
+ $codes = explode(',', $code_list);
}
$locales = [];
@@ -435,13 +435,17 @@ class I18N {
// Create a translator
self::$translator = new Translator($translations, self::$locale->pluralRule());
- // Alphabetic sorting sequence (upper-case letters), used by webtrees to sort strings
- list(, self::$alphabet_upper) = explode('=', self::$translator->translate('ALPHABET_upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ'));
- // Alphabetic sorting sequence (lower-case letters), used by webtrees to sort strings
- list(, self::$alphabet_lower) = explode('=', self::$translator->translate('ALPHABET_lower=abcdefghijklmnopqrstuvwxyz'));
-
self::$list_separator = /* I18N: This punctuation is used to separate lists of items */ self::translate(', ');
+ // Create a collator
+ try {
+ self::$collator = new Collator(self::$locale->code());
+ // Ignore upper/lower case differences
+ self::$collator->setStrength(Collator::SECONDARY);
+ } catch (Exception $ex) {
+ // PHP-INTL is not installed? We'll use a fallback later.
+ }
+
return self::$locale->languageTag();
}
@@ -561,7 +565,7 @@ class I18N {
}
// Mirrored characters
- $text = strtr($text, self::$mirror_characters);
+ $text = strtr($text, self::MIRROR_CHARACTERS);
$reversed = '';
$digits = '';
@@ -602,9 +606,7 @@ class I18N {
}
/**
- * UTF8 version of PHP::strcasecmp()
- *
- * Perform a case-insensitive comparison of two strings, using rules from the current locale
+ * Perform a case-insensitive comparison of two strings.
*
* @param string $string1
* @param string $string2
@@ -612,98 +614,38 @@ class I18N {
* @return int
*/
public static function strcasecmp($string1, $string2) {
- $strpos1 = 0;
- $strpos2 = 0;
- $strlen1 = strlen($string1);
- $strlen2 = strlen($string2);
- while ($strpos1 < $strlen1 && $strpos2 < $strlen2) {
- $byte1 = ord($string1[$strpos1]);
- $byte2 = ord($string2[$strpos2]);
- if (($byte1 & 0xE0) === 0xC0) {
- $chr1 = $string1[$strpos1++] . $string1[$strpos1++];
- } elseif (($byte1 & 0xF0) === 0xE0) {
- $chr1 = $string1[$strpos1++] . $string1[$strpos1++] . $string1[$strpos1++];
- } else {
- $chr1 = $string1[$strpos1++];
- }
- if (($byte2 & 0xE0) === 0xC0) {
- $chr2 = $string2[$strpos2++] . $string2[$strpos2++];
- } elseif (($byte2 & 0xF0) === 0xE0) {
- $chr2 = $string2[$strpos2++] . $string2[$strpos2++] . $string2[$strpos2++];
- } else {
- $chr2 = $string2[$strpos2++];
- }
- if ($chr1 === $chr2) {
- continue;
- }
- // Try the local alphabet first
- $offset1 = strpos(self::$alphabet_lower, $chr1);
- if ($offset1 === false) {
- $offset1 = strpos(self::$alphabet_upper, $chr1);
- }
- $offset2 = strpos(self::$alphabet_lower, $chr2);
- if ($offset2 === false) {
- $offset2 = strpos(self::$alphabet_upper, $chr2);
- }
- if ($offset1 !== false && $offset2 !== false) {
- if ($offset1 === $offset2) {
- continue;
- } else {
- return $offset1 - $offset2;
- }
- }
- // Try the global alphabet next
- $offset1 = strpos(self::ALPHABET_LOWER, $chr1);
- if ($offset1 === false) {
- $offset1 = strpos(self::ALPHABET_UPPER, $chr1);
- }
- $offset2 = strpos(self::ALPHABET_LOWER, $chr2);
- if ($offset2 === false) {
- $offset2 = strpos(self::ALPHABET_UPPER, $chr2);
- }
- if ($offset1 !== false && $offset2 !== false) {
- if ($offset1 === $offset2) {
- continue;
- } else {
- return $offset1 - $offset2;
- }
- }
- // Just compare by unicode order
- return strcmp($chr1, $chr2);
+ if (self::$collator instanceof Collator) {
+ return self::$collator->compare($string1, $string2);
+ } else {
+ return strcmp(self::strtolower($string1), self::strtolower($string2));
}
- // Shortest string comes first.
- return ($strlen1 - $strpos1) - ($strlen2 - $strpos2);
}
/**
- * UTF8 version of PHP::strtolower()
- *
- * Convert a string to lower case, using the rules from the current locale
+ * Convert a string to lower case.
*
* @param string $string
*
* @return string
*/
public static function strtolower($string) {
- if (self::$locale->language()->code() === 'tr' || self::$locale->language()->code() === 'az') {
- $string = strtr($string, ['I' => 'ı', 'İ' => 'i']);
+ if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
+ $string = strtr($string, self::DOTLESS_I_TOLOWER);
}
return mb_strtolower($string);
}
/**
- * UTF8 version of PHP::strtoupper()
- *
- * Convert a string to upper case, using the rules from the current locale
+ * Convert a string to upper case.
*
* @param string $string
*
* @return string
*/
public static function strtoupper($string) {
- if (self::$locale->language()->code() === 'tr' || self::$locale->language()->code() === 'az') {
- $string = strtr($string, ['ı' => 'I', 'i' => 'İ']);
+ if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
+ $string = strtr($string, self::DOTLESS_I_TOUPPER);
}
return mb_strtoupper($string);
@@ -762,7 +704,7 @@ class I18N {
return 'Latn';
}
- foreach (self::$scripts as $range) {
+ foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
if ($code_point >= $range[1] && $code_point <= $range[2]) {
return $range[0];
}