. */ declare(strict_types=1); namespace Fisharebest\Webtrees\Services; use Fisharebest\Webtrees\Auth; use Fisharebest\Webtrees\DB; use Fisharebest\Webtrees\Encodings\UTF16BE; use Fisharebest\Webtrees\Encodings\UTF16LE; use Fisharebest\Webtrees\Encodings\UTF8; use Fisharebest\Webtrees\Encodings\Windows1252; use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; use Fisharebest\Webtrees\Gedcom; use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; use Fisharebest\Webtrees\GedcomRecord; use Fisharebest\Webtrees\Header; use Fisharebest\Webtrees\Registry; use Fisharebest\Webtrees\Site; use Fisharebest\Webtrees\Tree; use Fisharebest\Webtrees\Webtrees; use Illuminate\Database\Query\Builder; use Illuminate\Database\Query\Expression; use Illuminate\Support\Collection; use League\Flysystem\Filesystem; use League\Flysystem\FilesystemOperator; use Psr\Http\Message\ResponseFactoryInterface; use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\StreamFactoryInterface; use RuntimeException; use ZipArchive; use function addcslashes; use function date; use function explode; use function fclose; use function fopen; use function fwrite; use function is_string; use function pathinfo; use function preg_match_all; use function rewind; use function stream_filter_append; use function stream_get_meta_data; use function strlen; use function strpos; use function strtolower; use function strtoupper; use function tmpfile; use const PATHINFO_EXTENSION; use const PREG_SET_ORDER; use const STREAM_FILTER_WRITE; /** * Export data in GEDCOM format */ class GedcomExportService { private const array ACCESS_LEVELS = [ 'gedadmin' => Auth::PRIV_NONE, 'user' => Auth::PRIV_USER, 'visitor' => Auth::PRIV_PRIVATE, 'none' => Auth::PRIV_HIDE, ]; public function __construct( private readonly ResponseFactoryInterface $response_factory, private readonly StreamFactoryInterface $stream_factory, ) { } /** * @param Tree $tree Export data from this tree * @param bool $sort_by_xref Write GEDCOM records in XREF order * @param string $encoding Convert from UTF-8 to other encoding * @param string $privacy Filter records by role * @param string $line_endings CRLF or LF * @param string $filename Name of download file, without an extension * @param string $format One of: gedcom, zip, zipmedia, gedzip * @param Collection|null $records */ public function downloadResponse( Tree $tree, bool $sort_by_xref, string $encoding, string $privacy, string $line_endings, string $filename, string $format, Collection|null $records = null ): ResponseInterface { $access_level = self::ACCESS_LEVELS[$privacy]; if ($format === 'gedcom') { $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records); $stream = $this->stream_factory->createStreamFromResource($resource); return $this->response_factory->createResponse() ->withBody($stream) ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME) ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"'); } // Create a new/empty .ZIP file $temp_zip_file = stream_get_meta_data(tmpfile())['uri']; $zip_filesystem = new ZipArchive(); $zip_filesystem->open($temp_zip_file, ZipArchive::CREATE | ZipArchive::OVERWRITE); if ($format === 'zipmedia') { $media_path = $tree->mediaFolder(); } elseif ($format === 'gedzip') { $media_path = ''; } else { // Don't add media $media_path = null; } $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path); if ($format === 'gedzip') { $zip_filesystem->addFromString('gedcom.ged', stream_get_contents($resource)); $extension = '.gdz'; } else { $zip_filesystem->addFromString($filename . '.ged', stream_get_contents($resource)); $extension = '.zip'; } fclose($resource); $zip_filesystem->close(); $stream = $this->stream_factory->createStreamFromFile($temp_zip_file); return $this->response_factory->createResponse() ->withBody($stream) ->withHeader('content-type', 'application/zip') ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"'); } /** * Write GEDCOM data to a stream. * * @param Tree $tree Export data from this tree * @param bool $sort_by_xref Write GEDCOM records in XREF order * @param string $encoding Convert from UTF-8 to other encoding * @param int $access_level Apply privacy filtering * @param string $line_endings CRLF or LF * @param Collection|null $records Just export these records * @param ZipArchive|FilesystemOperator|null $zip_filesystem Write media files to this filesystem * @param string|null $media_path Location within the zip filesystem * * @return resource */ public function export( Tree $tree, bool $sort_by_xref = false, string $encoding = UTF8::NAME, int $access_level = Auth::PRIV_HIDE, string $line_endings = 'CRLF', Collection|null $records = null, ZipArchive|FilesystemOperator|null $zip_filesystem = null, string|null $media_path = null ) { $stream = fopen('php://memory', 'wb+'); if ($stream === false) { throw new RuntimeException('Failed to create temporary stream'); } stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); if ($records instanceof Collection) { // Export just these records - e.g. from clippings cart. $data = [ new Collection([$this->createHeader($tree, $encoding, false, $access_level)]), $records, new Collection(['0 TRLR']), ]; } elseif ($access_level === Auth::PRIV_HIDE) { // If we will be applying privacy filters, then we will need the GEDCOM record objects. $data = [ new Collection([$this->createHeader($tree, $encoding, true, $access_level)]), $this->individualQuery($tree, $sort_by_xref)->cursor(), $this->familyQuery($tree, $sort_by_xref)->cursor(), $this->sourceQuery($tree, $sort_by_xref)->cursor(), $this->otherQuery($tree, $sort_by_xref)->cursor(), $this->mediaQuery($tree, $sort_by_xref)->cursor(), new Collection(['0 TRLR']), ]; } else { // Disable the pending changes before creating GEDCOM records. Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static fn (): Collection => new Collection()); $data = [ new Collection([$this->createHeader($tree, $encoding, true, $access_level)]), $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), new Collection(['0 TRLR']), ]; } $media_filesystem = $tree->mediaFilesystem(); foreach ($data as $rows) { foreach ($rows as $datum) { if (is_string($datum)) { $gedcom = $datum; } elseif ($datum instanceof GedcomRecord) { $gedcom = $datum->privatizeGedcom($access_level); if ($gedcom === '') { continue; } } else { $gedcom = $datum->i_gedcom ?? $datum->f_gedcom ?? $datum->s_gedcom ?? $datum->m_gedcom ?? $datum->o_gedcom ?? ''; } if ($media_path !== null && preg_match('/^0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) { preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER); foreach ($matches as $match) { $media_file = $match[1]; if ($media_filesystem->fileExists($media_file)) { if ($zip_filesystem instanceof Filesystem) { $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file)); } if ($zip_filesystem instanceof ZipArchive) { $tmpfile = tempnam(sys_get_temp_dir(), 'wt-zip-'); $src = $media_filesystem->readStream($media_file); $dst = fopen($tmpfile, 'wb+'); stream_copy_to_stream($src, $dst); $zip_filesystem->addFile($tmpfile, $media_path . $media_file); // Media files are (almost always) already compressed. Don't recompress them. $zip_filesystem->setCompressionName($media_path . $media_file, ZipArchive::CM_STORE); fclose($src); fclose($dst); } } } } $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; if ($line_endings === 'CRLF') { $gedcom = strtr($gedcom, ["\n" => "\r\n"]); } $bytes_written = fwrite($stream, $gedcom); if ($bytes_written !== strlen($gedcom)) { throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); } } } if (rewind($stream) === false) { throw new RuntimeException('Cannot rewind temporary stream'); } return $stream; } public function createHeader(Tree $tree, string $encoding, bool $include_sub, int $access_level): string { // Force a ".ged" suffix $filename = $tree->name(); if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { $filename .= '.ged'; } $gedcom_encodings = [ UTF16BE::NAME => 'UNICODE', UTF16LE::NAME => 'UNICODE', Windows1252::NAME => 'ANSI', ]; $encoding = $gedcom_encodings[$encoding] ?? $encoding; // Build a new header record $gedcom = '0 HEAD'; $gedcom .= "\n1 SOUR " . Webtrees::NAME; $gedcom .= "\n2 NAME " . Webtrees::NAME; $gedcom .= "\n2 VERS " . Webtrees::VERSION; $gedcom .= "\n1 DEST DISKETTE"; $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); $gedcom .= "\n2 TIME " . date('H:i:s'); $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; $gedcom .= "\n1 CHAR " . $encoding; $gedcom .= "\n1 FILE " . $filename; // Preserve some values from the original header $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); // There should always be a header record. if ($header instanceof Header) { foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE'], false, $access_level) as $fact) { $gedcom .= "\n" . $fact->gedcom(); } if ($include_sub) { foreach ($header->facts(['SUBM', 'SUBN'], false, $access_level) as $fact) { $gedcom .= "\n" . $fact->gedcom(); } } } return $gedcom; } public function wrapLongLines(string $gedcom, int $max_line_length): string { $lines = []; foreach (explode("\n", $gedcom) as $line) { // Split long lines // The total length of a GEDCOM line, including level number, cross-reference number, // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. if (mb_strlen($line) > $max_line_length) { [$level, $tag] = explode(' ', $line, 3); if ($tag !== 'CONT') { $level++; } do { // Split after $pos chars $pos = $max_line_length; // Split on a non-space (standard gedcom behavior) while (mb_substr($line, $pos - 1, 1) === ' ') { --$pos; } if ($pos === strpos($line, ' ', 3)) { // No non-spaces in the data! Can’t split it :-( break; } $lines[] = mb_substr($line, 0, $pos); $line = $level . ' CONC ' . mb_substr($line, $pos); } while (mb_strlen($line) > $max_line_length); } $lines[] = $line; } return implode("\n", $lines); } private function familyQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('families') ->where('f_file', '=', $tree->id()) ->select(['f_gedcom', 'f_id']); if ($sort_by_xref) { $query ->orderBy(new Expression((DB::driverName() === DB::FIREBIRD ? 'CHAR_' : '' ).'LENGTH(f_id)')) ->orderBy('f_id'); } return $query; } private function individualQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('individuals') ->where('i_file', '=', $tree->id()) ->select(['i_gedcom', 'i_id']); if ($sort_by_xref) { $query ->orderBy(new Expression((DB::driverName() === DB::FIREBIRD ? 'CHAR_' : '' ).'LENGTH(i_id)')) ->orderBy('i_id'); } return $query; } private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('sources') ->where('s_file', '=', $tree->id()) ->select(['s_gedcom', 's_id']); if ($sort_by_xref) { $query ->orderBy(new Expression((DB::driverName() === DB::FIREBIRD ? 'CHAR_' : '' ).'LENGTH(s_id)')) ->orderBy('s_id'); } return $query; } private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('media') ->where('m_file', '=', $tree->id()) ->select(['m_gedcom', 'm_id']); if ($sort_by_xref) { $query ->orderBy(new Expression((DB::driverName() === DB::FIREBIRD ? 'CHAR_' : '' ).'LENGTH(m_id)')) ->orderBy('m_id'); } return $query; } private function otherQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('other') ->where('o_file', '=', $tree->id()) ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) ->select(['o_gedcom', 'o_id']); if ($sort_by_xref) { $query ->orderBy('o_type') ->orderBy(new Expression((DB::driverName() === DB::FIREBIRD ? 'CHAR_' : '' ).'LENGTH(o_id)')) ->orderBy('o_id'); } return $query; } }