diff --git a/composer.json b/composer.json index e696c132..92a29565 100644 --- a/composer.json +++ b/composer.json @@ -79,7 +79,11 @@ "ext-imap": "*", "tatevikgr/rss-feed": "dev-main", "ext-pdo": "*", - "ezyang/htmlpurifier": "^4.19" + "ezyang/htmlpurifier": "^4.19", + "ext-libxml": "*", + "ext-gd": "*", + "ext-curl": "*", + "ext-fileinfo": "*" }, "require-dev": { "phpunit/phpunit": "^9.5", diff --git a/config/parameters.yml.dist b/config/parameters.yml.dist index 41c9a20b..fb44d2ea 100644 --- a/config/parameters.yml.dist +++ b/config/parameters.yml.dist @@ -25,12 +25,18 @@ parameters: env(DATABASE_PREFIX): 'phplist_' list_table_prefix: '%%env(LIST_TABLE_PREFIX)%%' env(LIST_TABLE_PREFIX): 'listattr_' + app.dev_version: '%%env(APP_DEV_VERSION)%%' + env(APP_DEV_VERSION): 0 + app.dev_email: '%%env(APP_DEV_EMAIL)%%' + env(APP_DEV_EMAIL): 'dev@dev.com' + app.powered_by_phplist: '%%env(APP_POWERED_BY_PHPLIST)%%' + env(APP_POWERED_BY_PHPLIST): 0 # Email configuration app.mailer_from: '%%env(MAILER_FROM)%%' env(MAILER_FROM): 'noreply@phplist.com' app.mailer_dsn: '%%env(MAILER_DSN)%%' - env(MAILER_DSN): 'null://null' + env(MAILER_DSN): 'null://null' # set local_domain on transport app.confirmation_url: '%%env(CONFIRMATION_URL)%%' env(CONFIRMATION_URL): 'https://example.com/subscriber/confirm/' app.subscription_confirmation_url: '%%env(SUBSCRIPTION_CONFIRMATION_URL)%%' @@ -89,3 +95,30 @@ parameters: env(MESSAGING_MAX_PROCESS_TIME): '600' messaging.max_mail_size: '%%env(MAX_MAILSIZE)%%' env(MAX_MAILSIZE): '209715200' + messaging.default_message_age: '%%env(DEFAULT_MESSAGEAGE)%%' + env(DEFAULT_MESSAGEAGE): '691200' + messaging.use_manual_text_part: '%%env(USE_MANUAL_TEXT_PART)%%' + env(USE_MANUAL_TEXT_PART): 0 + messaging.blacklist_grace_time: '%%env(MESSAGING_BLACKLIST_GRACE_TIME)%%' + env(MESSAGING_BLACKLIST_GRACE_TIME): + messaging.google_sender_id: '%%env(GOOGLE_SENDERID)%%' + env(GOOGLE_SENDERID): '' + messaging.use_amazon_ses: '%%env(USE_AMAZONSES)%%' + env(USE_AMAZONSES): 0 + messaging.embed_external_images: '%%env(EMBEDEXTERNALIMAGES)%%' + env(EMBEDEXTERNALIMAGES): 0 + messaging.embed_uploaded_images: '%%env(EMBEDUPLOADIMAGES)%%' + env(EMBEDUPLOADIMAGES): 0 + messaging.external_image_max_age: '%%env(EXTERNALIMAGE_MAXAGE)%%' + env(EXTERNALIMAGE_MAXAGE): 0 + messaging.external_image_timeout: '%%env(EXTERNALIMAGE_TIMEOUT)%%' + env(EXTERNALIMAGE_TIMEOUT): 30 + messaging.external_image_max_size: '%%env(EXTERNALIMAGE_MAXSIZE)%%' + env(EXTERNALIMAGE_MAXSIZE): 2048 + + phplist.upload_images_dir: '%%env(PHPLIST_UPLOADIMAGES_DIR)%%' + env(PHPLIST_UPLOADIMAGES_DIR): 'images' + phplist.editor_images_dir: '%%env(FCKIMAGES_DIR)%%' + env(FCKIMAGES_DIR): 'uploadimages' + phplist.public_schema: '%%env(PUBLIC_SCHEMA)%%' + env(PUBLIC_SCHEMA): 'http' diff --git a/src/Domain/Analytics/Service/LinkTrackService.php b/src/Domain/Analytics/Service/LinkTrackService.php index 902092f6..0b3a8c5e 100644 --- a/src/Domain/Analytics/Service/LinkTrackService.php +++ b/src/Domain/Analytics/Service/LinkTrackService.php @@ -8,8 +8,7 @@ use PhpList\Core\Domain\Analytics\Exception\MissingMessageIdException; use PhpList\Core\Domain\Analytics\Model\LinkTrack; use PhpList\Core\Domain\Analytics\Repository\LinkTrackRepository; -use PhpList\Core\Domain\Messaging\Model\Message; -use PhpList\Core\Domain\Messaging\Model\Message\MessageContent; +use PhpList\Core\Domain\Messaging\Model\Dto\MessagePrecacheDto; class LinkTrackService { @@ -39,7 +38,7 @@ public function isExtractAndSaveLinksApplicable(): bool * @return LinkTrack[] The saved LinkTrack entities * @throws MissingMessageIdException */ - public function extractAndSaveLinks(MessageContent $content, int $userId, ?int $messageId = null): array + public function extractAndSaveLinks(MessagePrecacheDto $content, int $userId, ?int $messageId = null): array { if (!$this->isExtractAndSaveLinksApplicable()) { return []; @@ -49,10 +48,10 @@ public function extractAndSaveLinks(MessageContent $content, int $userId, ?int $ throw new MissingMessageIdException(); } - $links = $this->extractLinksFromHtml($content->getText() ?? ''); + $links = $this->extractLinksFromHtml($content->content ?? ''); - if ($content->getFooter() !== null) { - $links = array_merge($links, $this->extractLinksFromHtml($content->getFooter())); + if ($content->htmlFooter) { + $links = array_merge($links, $this->extractLinksFromHtml($content->htmlFooter)); } $links = array_unique($links); diff --git a/src/Domain/Common/ExternalImageService.php b/src/Domain/Common/ExternalImageService.php new file mode 100644 index 00000000..e4e4086a --- /dev/null +++ b/src/Domain/Common/ExternalImageService.php @@ -0,0 +1,177 @@ +externalCacheDir = $this->tempDir . '/external_cache'; + } + + public function getFromCache(string $filename, int $messageId): ?string + { + $cacheFile = $this->generateLocalFileName($filename, $messageId); + + if (!is_file($cacheFile) || filesize($cacheFile) <= 64) { + return null; + } + + $content = file_get_contents($cacheFile); + if ($content === false) { + return null; + } + + return base64_encode($content); + } + + public function cache($filename, $messageId): bool + { + if ( + !(str_starts_with($filename, 'http')) + || str_contains($filename, '://' . $this->configProvider->getValue(ConfigOption::Website) . '/') + ) { + return false; + } + + if (!file_exists($this->externalCacheDir)) { + @mkdir($this->externalCacheDir); + } + + if (!file_exists($this->externalCacheDir) || !is_writable($this->externalCacheDir)) { + return false; + } + + $this->removeOldFilesInCache(); + + $cacheFileName = $this->generateLocalFileName($filename, $messageId); + + if (!file_exists($cacheFileName)) { + $cacheFileContent = null; + + if (function_exists('curl_init')) { + $cacheFileContent = $this->downloadUsingCurl($filename); + } + + if ($cacheFileContent === null) { + $cacheFileContent = $this->downloadUsingFileGetContent($filename); + } + + if ($this->externalImageMaxSize && (strlen($cacheFileContent) > $this->externalImageMaxSize)) { + $cacheFileContent = 'MAX_SIZE'; + } + + $cacheFileHandle = @fopen($cacheFileName, 'wb'); + if ($cacheFileHandle !== false) { + if (flock($cacheFileHandle, LOCK_EX)) { + fwrite($cacheFileHandle, $cacheFileContent); + fflush($cacheFileHandle); + flock($cacheFileHandle, LOCK_UN); + } + fclose($cacheFileHandle); + } + } + + if (file_exists($cacheFileName) && (@filesize($cacheFileName) > 64)) { + return true; + } + + return false; + } + + private function removeOldFilesInCache(): void + { + $extCacheDirHandle = @opendir($this->externalCacheDir); + if (!$this->externalImageMaxAge || !$extCacheDirHandle) { + return; + } + + while (($cacheFile = @readdir($extCacheDirHandle)) !== false) { + // todo: make sure that this is what we need + if (!str_starts_with($cacheFile, '.')) { + $cacheFileMTime = @filemtime($this->externalCacheDir.'/'.$cacheFile); + + if ( + is_numeric($cacheFileMTime) + && ($cacheFileMTime > 0) + && ((time() - $cacheFileMTime) > $this->externalImageMaxAge) + ) { + @unlink($this->externalCacheDir.'/'.$cacheFile); + } + } + } + + @closedir($extCacheDirHandle); + } + + private function generateLocalFileName(string $filename, int $messageId): string + { + return $this->externalCacheDir + . '/' + . $messageId + . '_' + . preg_replace([ '~[\.][\.]+~Ui', '~[^\w\.]~Ui',], ['', '_'], $filename); + } + + private function downloadUsingCurl(string $filename): ?string + { + $cURLHandle = curl_init($filename); + + if ($cURLHandle !== false) { + curl_setopt($cURLHandle, CURLOPT_HTTPGET, true); + curl_setopt($cURLHandle, CURLOPT_HEADER, 0); + curl_setopt($cURLHandle, CURLOPT_RETURNTRANSFER, true); + curl_setopt($cURLHandle, CURLOPT_TIMEOUT, $this->externalImageTimeout); + curl_setopt($cURLHandle, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($cURLHandle, CURLOPT_MAXREDIRS, 10); + curl_setopt($cURLHandle, CURLOPT_SSL_VERIFYPEER, false); + curl_setopt($cURLHandle, CURLOPT_FAILONERROR, true); + + $cacheFileContent = curl_exec($cURLHandle); + + $cURLErrNo = curl_errno($cURLHandle); + $cURLInfo = curl_getinfo($cURLHandle); + + curl_close($cURLHandle); + + if ($cURLErrNo != 0) { + $cacheFileContent = 'CURL_ERROR_' . $cURLErrNo; + } + if ($cURLInfo['http_code'] >= 400) { + $cacheFileContent = 'HTTP_CODE_' . $cURLInfo['http_code']; + } + } + + return $cacheFileContent ?? null; + } + + private function downloadUsingFileGetContent(string $filename): string + { + $remoteURLContext = stream_context_create([ + 'http' => [ + 'method' => 'GET', + 'timeout' => $this->externalImageTimeout, + 'max_redirects' => '10', + ] + ]); + + $cacheFileContent = file_get_contents($filename, false, $remoteURLContext); + if ($cacheFileContent === false) { + $cacheFileContent = 'FGC_ERROR'; + } + + return $cacheFileContent; + } +} diff --git a/src/Domain/Common/Html2Text.php b/src/Domain/Common/Html2Text.php new file mode 100644 index 00000000..93705748 --- /dev/null +++ b/src/Domain/Common/Html2Text.php @@ -0,0 +1,85 @@ +]*>(.*?)<\/script\s*>/is", '', $text); + $text = preg_replace("/]*>(.*?)<\/style\s*>/is", '', $text); + + $text = preg_replace( + "/]*href=([\"\'])(.*)\\1[^>]*>(.*)<\/a>/Umis", + "[URLTEXT]\\3[ENDURLTEXT][LINK]\\2[ENDLINK]\n", + $text + ); + $text = preg_replace("/(.*?)<\/b\s*>/is", '*\\1*', $text); + $text = preg_replace("/(.*?)<\/h[\d]\s*>/is", "**\\1**\n", $text); + $text = preg_replace("/(.*?)<\/i\s*>/is", '/\\1/', $text); + $text = preg_replace("/<\/tr\s*?>/i", "<\/tr>\n\n", $text); + $text = preg_replace("/<\/p\s*?>/i", "<\/p>\n\n", $text); + $text = preg_replace('/]*?>/i', "
\n", $text); + $text = preg_replace("/]*?\/>/i", "\n", $text); + $text = preg_replace('/ $fullmatch) { + $linktext = $links[1][$matchindex]; + $linkurl = $links[2][$matchindex]; + // check if the text linked is a repetition of the URL + if (trim($linktext) == trim($linkurl) || + 'https://'.trim($linktext) == trim($linkurl) || + 'http://'.trim($linktext) == trim($linkurl) + ) { + $linkreplace = $linkurl; + } else { + //# if link is an anchor only, take it out + if (strpos($linkurl, '#') === 0) { + $linkreplace = $linktext; + } else { + $linkreplace = $linktext.' <'.$linkurl.'>'; + } + } + $text = str_replace($fullmatch, $linkreplace, $text); + } + $text = preg_replace( + "/]*>(.*?)<\/a>/is", + '[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]', + $text, + 500 + ); + + $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); + + $text = preg_replace('/###NL###/', "\n", $text); + $text = preg_replace("/\n /", "\n", $text); + $text = preg_replace("/\t/", ' ', $text); + + // reduce whitespace + while (preg_match('/ /', $text)) { + $text = preg_replace('/ /', ' ', $text); + } + while (preg_match("/\n\s*\n\s*\n/", $text)) { + $text = preg_replace("/\n\s*\n\s*\n/", "\n\n", $text); + } + $ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP; + + return wordwrap($text, $ww); + } +} diff --git a/src/Domain/Common/HtmlUrlRewriter.php b/src/Domain/Common/HtmlUrlRewriter.php new file mode 100644 index 00000000..bba0b46e --- /dev/null +++ b/src/Domain/Common/HtmlUrlRewriter.php @@ -0,0 +1,190 @@ +
' . $html . '
'; + $dom->loadHTML($wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + + $xpath = new DOMXPath($dom); + + // Attributes to rewrite + $attrMap = [ + '//*[@src]' => 'src', + '//*[@href]' => 'href', + '//*[@action]' => 'action', + '//*[@background]' => 'background', + ]; + + foreach ($attrMap as $query => $attr) { + foreach ($xpath->query($query) as $node) { + /** @var DOMElement $node */ + $val = $node->getAttribute($attr); + $node->setAttribute($attr, $this->absolutizeUrl($val, $baseUrl)); + } + } + + // srcset needs special handling (multiple candidates) + foreach ($xpath->query('//*[@srcset]') as $node) { + /** @var DOMElement $node */ + $node->setAttribute('srcset', $this->rewriteSrcset($node->getAttribute('srcset'), $baseUrl)); + } + + // 2) Rewrite inline