diff options
| author | Greg Roach <greg@subaqua.co.uk> | 2021-11-05 18:45:20 +0000 |
|---|---|---|
| committer | Greg Roach <greg@subaqua.co.uk> | 2021-11-05 18:47:37 +0000 |
| commit | d5e02c3af059cb8388b4ccf503317f530ca159ab (patch) | |
| tree | 4e9360f0ac37d1c3b2bdb660261e5543e8fa0a9d /app/Report | |
| parent | 062377ee959c5bca8672cc6d68c83a87e2c6abfc (diff) | |
| download | webtrees-d5e02c3af059cb8388b4ccf503317f530ca159ab.tar.gz webtrees-d5e02c3af059cb8388b4ccf503317f530ca159ab.tar.bz2 webtrees-d5e02c3af059cb8388b4ccf503317f530ca159ab.zip | |
Move class to correct namespace
Diffstat (limited to 'app/Report')
| -rw-r--r-- | app/Report/ReportPdfCell.php | 2 | ||||
| -rw-r--r-- | app/Report/ReportPdfText.php | 2 | ||||
| -rw-r--r-- | app/Report/RightToLeftSupport.php | 1145 |
3 files changed, 1145 insertions, 4 deletions
diff --git a/app/Report/ReportPdfCell.php b/app/Report/ReportPdfCell.php index 417dd12fc2..24ea79cbe0 100644 --- a/app/Report/ReportPdfCell.php +++ b/app/Report/ReportPdfCell.php @@ -19,8 +19,6 @@ declare(strict_types=1); namespace Fisharebest\Webtrees\Report; -use Fisharebest\Webtrees\Reports\RightToLeftSupport; - use function hexdec; use function is_array; use function preg_match; diff --git a/app/Report/ReportPdfText.php b/app/Report/ReportPdfText.php index 6c7188d2d3..244a584b5f 100644 --- a/app/Report/ReportPdfText.php +++ b/app/Report/ReportPdfText.php @@ -19,8 +19,6 @@ declare(strict_types=1); namespace Fisharebest\Webtrees\Report; -use Fisharebest\Webtrees\Reports\RightToLeftSupport; - use function count; use function explode; use function hexdec; diff --git a/app/Report/RightToLeftSupport.php b/app/Report/RightToLeftSupport.php new file mode 100644 index 0000000000..e58d4cecaa --- /dev/null +++ b/app/Report/RightToLeftSupport.php @@ -0,0 +1,1145 @@ +<?php + +/** + * webtrees: online genealogy + * Copyright (C) 2021 webtrees development team + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +declare(strict_types=1); + +namespace Fisharebest\Webtrees\Report; + +use Fisharebest\Webtrees\I18N; + +use function str_contains; + +/** + * RTL Functions for use in the PDF reports + */ +class RightToLeftSupport +{ + private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark: zero-width character with LTR directionality) + private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark: zero-width character with RTL directionality) + private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode) + private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode) + private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text) + private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text) + private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE) + + private const OPEN_PARENTHESES = '([{'; + + private const CLOSE_PARENTHESES = ')]}'; + + private const NUMBERS = '0123456789'; + + private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings + + private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings + + private const PUNCTUATION = ',.:;?!'; + + // Markup + private const START_LTR = '<LTR>'; + private const END_LTR = '</LTR>'; + private const START_RTL = '<RTL>'; + private const END_RTL = '</RTL>'; + private const LENGTH_START = 5; + private const LENGTH_END = 6; + + /** @var string Were we previously processing LTR or RTL. */ + private static $previousState; + + /** @var string Are we currently processing LTR or RTL. */ + private static $currentState; + + /** @var string Text waiting to be processed. */ + private static $waitingText; + + /** @var int Offset into the text. */ + private static $posSpanStart; + + /** + * This function strips ‎ and ‏ from the input string. It should be used for all + * text that has been passed through the PrintReady() function before that text is stored + * in the database. The database should NEVER contain these characters. + * + * @param string $inputText The string from which the ‎ and ‏ characters should be stripped + * + * @return string The input string, with ‎ and ‏ stripped + */ + private static function stripLrmRlm(string $inputText): string + { + return str_replace([ + self::UTF8_LRM, + self::UTF8_RLM, + self::UTF8_LRO, + self::UTF8_RLO, + self::UTF8_LRE, + self::UTF8_RLE, + self::UTF8_PDF, + '‎', + '‏', + '&LRM;', + '&RLM;', + ], '', $inputText); + } + + /** + * This function encapsulates all texts in the input with <span dir='xxx'> and </span> + * according to the directionality specified. + * + * @param string $inputText Raw input + * + * @return string The string with all texts encapsulated as required + */ + public static function spanLtrRtl(string $inputText): string + { + if ($inputText === '') { + // Nothing to do + return ''; + } + + $workingText = str_replace("\n", '<br>', $inputText); + $workingText = str_replace([ + '<span class="starredname"><br>', + '<span<br>class="starredname">', + ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks + $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes + + self::$previousState = ''; + self::$currentState = strtoupper(I18N::direction()); + $numberState = false; // Set when we're inside a numeric string + $result = ''; + self::$waitingText = ''; + $openParDirection = []; + + self::beginCurrentSpan($result); + + while ($workingText !== '') { + $charArray = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character + $currentLetter = $charArray['letter']; + $currentLen = $charArray['length']; + + $openParIndex = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this? + $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this? + + switch ($currentLetter) { + case '<': + // Assume this '<' starts an HTML element + $endPos = strpos($workingText, '>'); // look for the terminating '>' + if ($endPos === false) { + $endPos = 0; + } + $currentLen += $endPos; + $element = substr($workingText, 0, $currentLen); + $temp = strtolower(substr($element, 0, 3)); + if (strlen($element) < 7 && $temp === '<br') { + if ($numberState) { + $numberState = false; + if (self::$currentState === 'RTL') { + self::$waitingText .= self::UTF8_PDF; + } + } + self::breakCurrentSpan($result); + } elseif (self::$waitingText === '') { + $result .= $element; + } else { + self::$waitingText .= $element; + } + $workingText = substr($workingText, $currentLen); + break; + case '&': + // Assume this '&' starts an HTML entity + $endPos = strpos($workingText, ';'); // look for the terminating ';' + if ($endPos === false) { + $endPos = 0; + } + $currentLen += $endPos; + $entity = substr($workingText, 0, $currentLen); + if (strtolower($entity) === ' ') { + $entity = ' '; // Ensure consistent case for this entity + } + if (self::$waitingText === '') { + $result .= $entity; + } else { + self::$waitingText .= $entity; + } + $workingText = substr($workingText, $currentLen); + break; + case '{': + if (substr($workingText, 1, 1) === '{') { + // Assume this '{{' starts a TCPDF directive + $endPos = strpos($workingText, '}}'); // look for the terminating '}}' + if ($endPos === false) { + $endPos = 0; + } + $currentLen = $endPos + 2; + $directive = substr($workingText, 0, $currentLen); + $workingText = substr($workingText, $currentLen); + $result .= self::$waitingText . $directive; + self::$waitingText = ''; + break; + } + // no break + default: + // Look for strings of numbers with optional leading or trailing + or - + // and with optional embedded numeric punctuation + if ($numberState) { + // If we're inside a numeric string, look for reasons to end it + $offset = 0; // Be sure to look at the current character first + $charArray = self::getChar($workingText . "\n", $offset); + if (!str_contains(self::NUMBERS, $charArray['letter'])) { + // This is not a digit. Is it numeric punctuation? + if (substr($workingText . "\n", $offset, 6) === ' ') { + $offset += 6; // This could be numeric punctuation + } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) { + $offset += $charArray['length']; // This could be numeric punctuation + } + // If the next character is a digit, the current character is numeric punctuation + $charArray = self::getChar($workingText . "\n", $offset); + if (!str_contains(self::NUMBERS, $charArray['letter'])) { + // This is not a digit. End the run of digits and punctuation. + $numberState = false; + if (self::$currentState === 'RTL') { + if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) { + $currentLetter = self::UTF8_PDF . $currentLetter; + } else { + $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run + } + } + } + } + } else { + // If we're outside a numeric string, look for reasons to start it + if (str_contains(self::NUMBER_PREFIX, $currentLetter)) { + // This might be a number lead-in + $offset = $currentLen; + $nextChar = substr($workingText . "\n", $offset, 1); + if (str_contains(self::NUMBERS, $nextChar)) { + $numberState = true; // We found a digit: the lead-in is therefore numeric + if (self::$currentState === 'RTL') { + $currentLetter = self::UTF8_LRE . $currentLetter; + } + } + } elseif (str_contains(self::NUMBERS, $currentLetter)) { + $numberState = true; // The current letter is a digit + if (self::$currentState === 'RTL') { + $currentLetter = self::UTF8_LRE . $currentLetter; + } + } + } + + // Determine the directionality of the current UTF-8 character + $newState = self::$currentState; + + while (true) { + if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') { + if (self::$currentState === '') { + $newState = 'RTL'; + break; + } + + if (self::$currentState === 'RTL') { + break; + } + // Switch to RTL only if this isn't a solitary RTL letter + $tempText = substr($workingText, $currentLen); + while ($tempText !== '') { + $nextCharArray = self::getChar($tempText, 0); + $nextLetter = $nextCharArray['letter']; + $nextLen = $nextCharArray['length']; + $tempText = substr($tempText, $nextLen); + + if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') { + $newState = 'RTL'; + break 2; + } + + if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) { + $newState = 'RTL'; + break 2; + } + + if ($nextLetter === ' ') { + break; + } + $nextLetter .= substr($tempText . "\n", 0, 5); + if ($nextLetter === ' ') { + break; + } + } + // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality + $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF; + $newState = 'LTR'; + break; + } + if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') { + // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR + $newState = 'LTR'; + break; + } + if ($closeParIndex !== false) { + // This closing parenthesis has to inherit the matching opening parenthesis' directionality + if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') { + $newState = $openParDirection[$closeParIndex]; + } + $openParDirection[$closeParIndex] = ''; + break; + } + if ($openParIndex !== false) { + // Opening parentheses always inherit the following directionality + self::$waitingText .= $currentLetter; + $workingText = substr($workingText, $currentLen); + while (true) { + if ($workingText === '') { + break; + } + if (substr($workingText, 0, 1) === ' ') { + // Spaces following this left parenthesis inherit the following directionality too + self::$waitingText .= ' '; + $workingText = substr($workingText, 1); + continue; + } + if (substr($workingText, 0, 6) === ' ') { + // Spaces following this left parenthesis inherit the following directionality too + self::$waitingText .= ' '; + $workingText = substr($workingText, 6); + continue; + } + break; + } + $openParDirection[$openParIndex] = '?'; + break 2; // double break because we're waiting for more information + } + + // We have a digit or a "normal" special character. + // + // When this character is not at the start of the input string, it inherits the preceding directionality; + // at the start of the input string, it assumes the following directionality. + // + // Exceptions to this rule will be handled later during final clean-up. + // + self::$waitingText .= $currentLetter; + $workingText = substr($workingText, $currentLen); + if (self::$currentState !== '') { + $result .= self::$waitingText; + self::$waitingText = ''; + } + break 2; // double break because we're waiting for more information + } + if ($newState !== self::$currentState) { + // A direction change has occurred + self::finishCurrentSpan($result); + self::$previousState = self::$currentState; + self::$currentState = $newState; + self::beginCurrentSpan($result); + } + self::$waitingText .= $currentLetter; + $workingText = substr($workingText, $currentLen); + $result .= self::$waitingText; + self::$waitingText = ''; + + foreach ($openParDirection as $index => $value) { + // Since we now know the proper direction, remember it for all waiting opening parentheses + if ($value === '?') { + $openParDirection[$index] = self::$currentState; + } + } + + break; + } + } + + // We're done. Finish last <span> if necessary + if ($numberState) { + if (self::$waitingText === '') { + if (self::$currentState === 'RTL') { + $result .= self::UTF8_PDF; + } + } else { + if (self::$currentState === 'RTL') { + self::$waitingText .= self::UTF8_PDF; + } + } + } + self::finishCurrentSpan($result, true); + + // Get rid of any waiting text + if (self::$waitingText !== '') { + if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') { + $result .= self::START_RTL; + $result .= self::$waitingText; + $result .= self::END_RTL; + } else { + $result .= self::START_LTR; + $result .= self::$waitingText; + $result .= self::END_LTR; + } + self::$waitingText = ''; + } + + // Lastly, do some more cleanups + + // Move leading RTL numeric strings to following LTR text + // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text) + while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) { + $spanEnd = strpos($result, self::END_RTL . self::START_LTR); + if ($spanEnd === false) { + break; + } + $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3)); + if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') { + break; + } + $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END); + break; + } + + // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span + if (I18N::direction() === 'rtl') { + $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result); + } + + // Trim trailing blanks preceding <br> in LTR text + while (self::$previousState !== 'RTL') { + if (str_contains($result, ' <LTRbr>')) { + $result = str_replace(' <LTRbr>', '<LTRbr>', $result); + continue; + } + if (str_contains($result, ' <LTRbr>')) { + $result = str_replace(' <LTRbr>', '<LTRbr>', $result); + continue; + } + if (str_contains($result, ' <br>')) { + $result = str_replace(' <br>', '<br>', $result); + continue; + } + if (str_contains($result, ' <br>')) { + $result = str_replace(' <br>', '<br>', $result); + continue; + } + break; // Neither space nor : we're done + } + + // Trim trailing blanks preceding <br> in RTL text + while (true) { + if (str_contains($result, ' <RTLbr>')) { + $result = str_replace(' <RTLbr>', '<RTLbr>', $result); + continue; + } + if (str_contains($result, ' <RTLbr>')) { + $result = str_replace(' <RTLbr>', '<RTLbr>', $result); + continue; + } + break; // Neither space nor : we're done + } + + // Convert '<LTRbr>' and '<RTLbr' + $result = str_replace([ + '<LTRbr>', + '<RTLbr>', + ], [ + self::END_LTR . '<br>' . self::START_LTR, + self::END_RTL . '<br>' . self::START_RTL, + ], $result); + + // Include leading indeterminate directional text in whatever follows + if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && substr($result . "\n", 0, 4) !== '<br>') { + $leadingText = ''; + while (true) { + if ($result === '') { + $result = $leadingText; + break; + } + if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) { + $leadingText .= substr($result, 0, 1); + $result = substr($result, 1); + continue; + } + $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START); + break; + } + } + + // Include solitary "-" and "+" in surrounding RTL text + $result = str_replace([ + self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL, + self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL, + ], [ + '-', + '+', + ], $result); + + //$result = strtr($result, [ + // self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-', + // self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+', + //]); + + // Remove empty spans + $result = str_replace([ + self::START_LTR . self::END_LTR, + self::START_RTL . self::END_RTL, + ], '', $result); + + // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>' + // LTR text: <span dir="ltr"> text </span> + // RTL text: <span dir="rtl"> text </span> + + $result = str_replace([ + self::START_LTR, + self::END_LTR, + self::START_RTL, + self::END_RTL, + ], [ + '<span dir="ltr">', + '</span>', + '<span dir="rtl">', + '</span>', + ], $result); + + return $result; + } + + /** + * Wrap words that have an asterisk suffix in <u> and </u> tags. + * This should underline starred names to show the preferred name. + * + * @param string $textSpan + * @param string $direction + * + * @return string + */ + private static function starredName(string $textSpan, string $direction): string + { + // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags + // only when page and span directions are identical. + if ($direction === strtoupper(I18N::direction())) { + while (true) { + $starPos = strpos($textSpan, '*'); + if ($starPos === false) { + break; + } + $trailingText = substr($textSpan, $starPos + 1); + $textSpan = substr($textSpan, 0, $starPos); + $wordStart = strrpos($textSpan, ' '); // Find the start of the word + if ($wordStart !== false) { + $leadingText = substr($textSpan, 0, $wordStart + 1); + $wordText = substr($textSpan, $wordStart + 1); + } else { + $leadingText = ''; + $wordText = $textSpan; + } + $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText; + } + $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan); + // The is a work-around for a TCPDF bug eating blanks. + $textSpan = str_replace([ + ' <u>', + '</u> ', + ], [ + ' <u>', + '</u> ', + ], $textSpan); + } else { + // Text and page directions differ: remove the <span> and </span> + $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan); + $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan); + } + + return $textSpan; + } + + /** + * Get the next character from an input string + * + * @param string $text + * @param int $offset + * + * @return array{'letter':string,'length':int} + */ + private static function getChar(string $text, int $offset): array + { + if ($text === '') { + return [ + 'letter' => '', + 'length' => 0, + ]; + } + + $char = substr($text, $offset, 1); + $length = 1; + if ((ord($char) & 0xE0) === 0xC0) { + $length = 2; + } + if ((ord($char) & 0xF0) === 0xE0) { + $length = 3; + } + if ((ord($char) & 0xF8) === 0xF0) { + $length = 4; + } + $letter = substr($text, $offset, $length); + + return [ + 'letter' => $letter, + 'length' => $length, + ]; + } + + /** + * Insert <br> into current span + * + * @param string $result + * + * @return void + */ + private static function breakCurrentSpan(string &$result): void + { + // Interrupt the current span, insert that <br>, and then continue the current span + $result .= self::$waitingText; + self::$waitingText = ''; + + $breakString = '<' . self::$currentState . 'br>'; + $result .= $breakString; + } + + /** + * Begin current span + * + * @param string $result + * + * @return void + */ + private static function beginCurrentSpan(string &$result): void + { + if (self::$currentState === 'LTR') { + $result .= self::START_LTR; + } + if (self::$currentState === 'RTL') { + $result .= self::START_RTL; + } + + self::$posSpanStart = strlen($result); + } + + /** + * Finish current span + * + * @param string $result + * @param bool $theEnd + * + * @return void + */ + private static function finishCurrentSpan(string &$result, bool $theEnd = false): void + { + $textSpan = substr($result, self::$posSpanStart); + $result = substr($result, 0, self::$posSpanStart); + + // Get rid of empty spans, so that our check for presence of RTL will work + $result = str_replace([ + self::START_LTR . self::END_LTR, + self::START_RTL . self::END_RTL, + ], '', $result); + + // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers. + $tempResult = ''; + while ($textSpan !== '') { + $posColon = strpos($textSpan, ':'); + if ($posColon === false) { + break; + } // No more possible time strings + $posLRE = strpos($textSpan, self::UTF8_LRE); + if ($posLRE === false) { + break; + } // No more numeric strings + $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE); + if ($posPDF === false) { + break; + } // No more numeric strings + + $tempResult .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string + $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string + $textSpan = substr($textSpan, $posPDF + 3); + $posColon = strpos($numericString, ':'); + if ($posColon === false) { + // Nothing that looks like a time here + $tempResult .= $numericString; + continue; + } + $posBlank = strpos($numericString . ' ', ' '); + $posNbsp = strpos($numericString . ' ', ' '); + if ($posBlank < $posNbsp) { + $posSeparator = $posBlank; + $lengthSeparator = 1; + } else { + $posSeparator = $posNbsp; + $lengthSeparator = 6; + } + if ($posColon > $posSeparator) { + // We have a time string preceded by a blank: Exclude that blank from the numeric string + $tempResult .= substr($numericString, 0, $posSeparator); + $tempResult .= self::UTF8_PDF; + $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); + $tempResult .= self::UTF8_LRE; + $numericString = substr($numericString, $posSeparator + $lengthSeparator); + } + + $posBlank = strpos($numericString, ' '); + $posNbsp = strpos($numericString, ' '); + if ($posBlank === false && $posNbsp === false) { + // The time string isn't followed by a blank + $textSpan = $numericString . $textSpan; + continue; + } + + // We have a time string followed by a blank: Exclude that blank from the numeric string + if ($posBlank === false) { + $posSeparator = $posNbsp; + $lengthSeparator = 6; + } elseif ($posNbsp === false) { + $posSeparator = $posBlank; + $lengthSeparator = 1; + } elseif ($posBlank < $posNbsp) { + $posSeparator = $posBlank; + $lengthSeparator = 1; + } else { + $posSeparator = $posNbsp; + $lengthSeparator = 6; + } + $tempResult .= substr($numericString, 0, $posSeparator); + $tempResult .= self::UTF8_PDF; + $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); + $posSeparator += $lengthSeparator; + $numericString = substr($numericString, $posSeparator); + $textSpan = self::UTF8_LRE . $numericString . $textSpan; + } + $textSpan = $tempResult . $textSpan; + $trailingBlanks = ''; + $trailingBreaks = ''; + + /* ****************************** LTR text handling ******************************** */ + + if (self::$currentState === 'LTR') { + // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too. + if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) { + $trailingString = ''; + $savedSpan = $textSpan; + while ($textSpan !== '') { + // Look for trailing spaces and tentatively move them + if (substr($textSpan, -1) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -3) !== self::UTF8_PDF) { + // There is no trailing numeric string + $textSpan = $savedSpan; + break; + } + + // We have a numeric string + $posStartNumber = strrpos($textSpan, self::UTF8_LRE); + if ($posStartNumber === false) { + $posStartNumber = 0; + } + $trailingString = substr($textSpan, $posStartNumber) . $trailingString; + $textSpan = substr($textSpan, 0, $posStartNumber); + + // Look for more spaces and move them too + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + break; + } + + self::$waitingText = $trailingString . self::$waitingText; + break; + } + } + + $savedSpan = $textSpan; + // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingBlanks = ' ' . $trailingBlanks; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr('......' . $textSpan, -6) === ' ') { + $trailingBlanks = ' ' . $trailingBlanks; + $textSpan = substr($textSpan, 0, -6); + continue; + } + break; + } + while (substr($textSpan, -7) === '<LTRbr>') { + $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span + $textSpan = substr($textSpan, 0, -7); + } + if ($trailingBreaks !== '') { + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingBreaks = ' ' . $trailingBreaks; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingBreaks = ' ' . $trailingBreaks; + $textSpan = substr($textSpan, 0, -6); + continue; + } + break; + } + self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span + } else { + $textSpan = $savedSpan; + } + + $trailingBlanks = ''; + $trailingPunctuation = ''; + $trailingID = ''; + $trailingSeparator = ''; + $leadingSeparator = ''; + + while (I18N::direction() === 'rtl') { + if (str_contains($result, self::START_RTL)) { + // Remove trailing blanks for inclusion in a separate LTR span + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingBlanks = ' ' . $trailingBlanks; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingBlanks = ' ' . $trailingBlanks; + $textSpan = substr($textSpan, 0, -1); + continue; + } + break; + } + + // Remove trailing punctuation for inclusion in a separate LTR span + if ($textSpan === '') { + $trailingChar = "\n"; + } else { + $trailingChar = substr($textSpan, -1); + } + if (str_contains(self::PUNCTUATION, $trailingChar)) { + $trailingPunctuation = $trailingChar; + $textSpan = substr($textSpan, 0, -1); + } + } + + // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span + while (true) { + if (substr($textSpan, -1) !== ')') { + break; + } // There is no trailing ')' + $posLeftParen = strrpos($textSpan, '('); + if ($posLeftParen === false) { + break; + } // There is no leading '(' + $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes + + // If the parenthesized text doesn't look like an ID number, + // we don't want to touch it. + // This check won’t work if somebody uses ID numbers with an unusual format. + $offset = 1; + $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text + if (str_contains(self::NUMBERS, $charArray['letter'])) { + break; + } + $offset += $charArray['length']; // Point at 2nd character of parenthesized text + if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) { + break; + } + // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too + if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) { + break; + } + + $trailingID = substr($textSpan, $posLeftParen); + $textSpan = substr($textSpan, 0, $posLeftParen); + break; + } + + // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span + if ($trailingID !== '') { + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingSeparator = ' ' . $trailingSeparator; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingSeparator = ' ' . $trailingSeparator; + $textSpan = substr($textSpan, 0, -6); + continue; + } + if (substr($textSpan, -1) === '-') { + $trailingSeparator = '-' . $trailingSeparator; + $textSpan = substr($textSpan, 0, -1); + continue; + } + break; + } + } + + // Look for " - " preceding the text and remove it for inclusion in a separate LTR span + $foundSeparator = false; + $savedSpan = $textSpan; + while ($textSpan !== '') { + if (substr($textSpan, 0, 1) === ' ') { + $leadingSeparator = ' ' . $leadingSeparator; + $textSpan = substr($textSpan, 1); + continue; + } + if (substr($textSpan, 0, 6) === ' ') { + $leadingSeparator = ' ' . $leadingSeparator; + $textSpan = substr($textSpan, 6); + continue; + } + if (substr($textSpan, 0, 1) === '-') { + $leadingSeparator = '-' . $leadingSeparator; + $textSpan = substr($textSpan, 1); + $foundSeparator = true; + continue; + } + break; + } + if (!$foundSeparator) { + $textSpan = $savedSpan; + $leadingSeparator = ''; + } + break; + } + + // We're done: finish the span + $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags + while (true) { + // Remove blanks that precede <LTRbr> + if (str_contains($textSpan, ' <LTRbr>')) { + $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); + continue; + } + if (str_contains($textSpan, ' <LTRbr>')) { + $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); + continue; + } + break; + } + if ($leadingSeparator !== '') { + $result .= self::START_LTR . $leadingSeparator . self::END_LTR; + } + $result .= $textSpan . self::END_LTR; + if ($trailingSeparator !== '') { + $result .= self::START_LTR . $trailingSeparator . self::END_LTR; + } + if ($trailingID !== '') { + $result .= self::START_LTR . $trailingID . self::END_LTR; + } + if ($trailingPunctuation !== '') { + $result .= self::START_LTR . $trailingPunctuation . self::END_LTR; + } + if ($trailingBlanks !== '') { + $result .= self::START_LTR . $trailingBlanks . self::END_LTR; + } + } + + /* ****************************** RTL text handling ******************************** */ + + if (self::$currentState === 'RTL') { + $savedSpan = $textSpan; + + // Move any trailing <br>, optionally followed by blanks, outside this RTL span + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingBlanks = ' ' . $trailingBlanks; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr('......' . $textSpan, -6) === ' ') { + $trailingBlanks = ' ' . $trailingBlanks; + $textSpan = substr($textSpan, 0, -6); + continue; + } + break; + } + while (substr($textSpan, -7) === '<RTLbr>') { + $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span + $textSpan = substr($textSpan, 0, -7); + } + if ($trailingBreaks !== '') { + self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span + } else { + $textSpan = $savedSpan; + } + + // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too. + if (!$theEnd && I18N::direction() !== 'rtl') { + $trailingString = ''; + $savedSpan = $textSpan; + while ($textSpan !== '') { + // Look for trailing spaces and tentatively move them + if (substr($textSpan, -1) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -3) !== self::UTF8_PDF) { + // There is no trailing numeric string + $textSpan = $savedSpan; + break; + } + + // We have a numeric string + $posStartNumber = strrpos($textSpan, self::UTF8_LRE); + if ($posStartNumber === false) { + $posStartNumber = 0; + } + $trailingString = substr($textSpan, $posStartNumber) . $trailingString; + $textSpan = substr($textSpan, 0, $posStartNumber); + + // Look for more spaces and move them too + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $trailingString = ' ' . $trailingString; + $textSpan = substr($textSpan, 0, -1); + continue; + } + break; + } + + self::$waitingText = $trailingString . self::$waitingText; + break; + } + } + + // Trailing " - " needs to be prefixed to the following span + if (!$theEnd && substr('...' . $textSpan, -3) === ' - ') { + $textSpan = substr($textSpan, 0, -3); + self::$waitingText = ' - ' . self::$waitingText; + } + + while (I18N::direction() === 'rtl') { + // Look for " - " preceding <RTLbr> and relocate it to the front of the string + $posDashString = strpos($textSpan, ' - <RTLbr>'); + if ($posDashString === false) { + break; + } + $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); + if ($posStringStart === false) { + $posStringStart = 0; + } else { + $posStringStart += 9; + } // Point to the first char following the last <RTLbr> + + $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3); + } + + // Strip leading spaces from the RTL text + $countLeadingSpaces = 0; + while ($textSpan !== '') { + if (substr($textSpan, 0, 1) === ' ') { + $countLeadingSpaces++; + $textSpan = substr($textSpan, 1); + continue; + } + if (substr($textSpan, 0, 6) === ' ') { + $countLeadingSpaces++; + $textSpan = substr($textSpan, 6); + continue; + } + break; + } + + // Strip trailing spaces from the RTL text + $countTrailingSpaces = 0; + while ($textSpan !== '') { + if (substr($textSpan, -1) === ' ') { + $countTrailingSpaces++; + $textSpan = substr($textSpan, 0, -1); + continue; + } + if (substr($textSpan, -6) === ' ') { + $countTrailingSpaces++; + $textSpan = substr($textSpan, 0, -6); + continue; + } + break; + } + + // Look for trailing " -", reverse it, and relocate it to the front of the string + if (substr($textSpan, -2) === ' -') { + $posDashString = strlen($textSpan) - 2; + $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); + if ($posStringStart === false) { + $posStringStart = 0; + } else { + $posStringStart += 9; + } // Point to the first char following the last <RTLbr> + + $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2); + } + + if ($countLeadingSpaces !== 0) { + $newLength = strlen($textSpan) + $countLeadingSpaces; + $textSpan = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT); + } + if ($countTrailingSpaces !== 0) { + if (I18N::direction() === 'ltr') { + if ($trailingBreaks === '') { + // Move trailing RTL spaces to front of following LTR span + $newLength = strlen(self::$waitingText) + $countTrailingSpaces; + self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT); + } + } else { + $newLength = strlen($textSpan) + $countTrailingSpaces; + $textSpan = str_pad($textSpan, $newLength); + } + } + + // We're done: finish the span + $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags + $result .= $textSpan . self::END_RTL; + } + + if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') { + $result .= $textSpan; + } + + $result .= $trailingBreaks; // Get rid of any waiting <br> + } +} |
