-
Notifications
You must be signed in to change notification settings - Fork 31
Refactor CampaignProcessorMessageHandler #374
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a72d2e9
d40dedd
613a196
759d8e0
d94f825
3b9267f
5fc8637
69884a8
077bc63
492e1d0
109b07a
65c0030
7e9bab2
3dcb90a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,177 @@ | ||
| <?php | ||
|
|
||
| declare(strict_types=1); | ||
|
|
||
| namespace PhpList\Core\Domain\Common; | ||
|
|
||
| use PhpList\Core\Domain\Configuration\Model\ConfigOption; | ||
| use PhpList\Core\Domain\Configuration\Service\Provider\ConfigProvider; | ||
|
|
||
| class ExternalImageService | ||
| { | ||
| private string $externalCacheDir; | ||
|
|
||
| public function __construct( | ||
| private readonly ConfigProvider $configProvider, | ||
| private readonly string $tempDir, | ||
| private readonly int $externalImageMaxAge, | ||
| private readonly int $externalImageMaxSize, | ||
| private readonly ?int $externalImageTimeout = 30, | ||
| ) { | ||
| $this->externalCacheDir = $this->tempDir . '/external_cache'; | ||
| } | ||
|
|
||
| public function getFromCache(string $filename, int $messageId): ?string | ||
| { | ||
| $cacheFile = $this->generateLocalFileName($filename, $messageId); | ||
|
|
||
| if (!is_file($cacheFile) || filesize($cacheFile) <= 64) { | ||
| return null; | ||
| } | ||
|
|
||
| $content = file_get_contents($cacheFile); | ||
| if ($content === false) { | ||
| return null; | ||
| } | ||
|
|
||
| return base64_encode($content); | ||
| } | ||
|
|
||
| public function cache($filename, $messageId): bool | ||
| { | ||
| if ( | ||
| !(str_starts_with($filename, 'http')) | ||
| || str_contains($filename, '://' . $this->configProvider->getValue(ConfigOption::Website) . '/') | ||
| ) { | ||
| return false; | ||
| } | ||
|
|
||
| if (!file_exists($this->externalCacheDir)) { | ||
| @mkdir($this->externalCacheDir); | ||
| } | ||
|
|
||
| if (!file_exists($this->externalCacheDir) || !is_writable($this->externalCacheDir)) { | ||
| return false; | ||
| } | ||
|
|
||
| $this->removeOldFilesInCache(); | ||
|
|
||
| $cacheFileName = $this->generateLocalFileName($filename, $messageId); | ||
|
|
||
| if (!file_exists($cacheFileName)) { | ||
| $cacheFileContent = null; | ||
|
|
||
| if (function_exists('curl_init')) { | ||
| $cacheFileContent = $this->downloadUsingCurl($filename); | ||
| } | ||
|
|
||
| if ($cacheFileContent === null) { | ||
| $cacheFileContent = $this->downloadUsingFileGetContent($filename); | ||
| } | ||
|
|
||
| if ($this->externalImageMaxSize && (strlen($cacheFileContent) > $this->externalImageMaxSize)) { | ||
| $cacheFileContent = 'MAX_SIZE'; | ||
| } | ||
|
|
||
| $cacheFileHandle = @fopen($cacheFileName, 'wb'); | ||
| if ($cacheFileHandle !== false) { | ||
| if (flock($cacheFileHandle, LOCK_EX)) { | ||
| fwrite($cacheFileHandle, $cacheFileContent); | ||
| fflush($cacheFileHandle); | ||
| flock($cacheFileHandle, LOCK_UN); | ||
| } | ||
| fclose($cacheFileHandle); | ||
| } | ||
| } | ||
|
|
||
| if (file_exists($cacheFileName) && (@filesize($cacheFileName) > 64)) { | ||
| return true; | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| private function removeOldFilesInCache(): void | ||
| { | ||
| $extCacheDirHandle = @opendir($this->externalCacheDir); | ||
| if (!$this->externalImageMaxAge || !$extCacheDirHandle) { | ||
| return; | ||
| } | ||
|
|
||
| while (($cacheFile = @readdir($extCacheDirHandle)) !== false) { | ||
| // todo: make sure that this is what we need | ||
| if (!str_starts_with($cacheFile, '.')) { | ||
| $cacheFileMTime = @filemtime($this->externalCacheDir.'/'.$cacheFile); | ||
|
|
||
| if ( | ||
| is_numeric($cacheFileMTime) | ||
| && ($cacheFileMTime > 0) | ||
| && ((time() - $cacheFileMTime) > $this->externalImageMaxAge) | ||
| ) { | ||
| @unlink($this->externalCacheDir.'/'.$cacheFile); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @closedir($extCacheDirHandle); | ||
| } | ||
|
|
||
| private function generateLocalFileName(string $filename, int $messageId): string | ||
| { | ||
| return $this->externalCacheDir | ||
| . '/' | ||
| . $messageId | ||
| . '_' | ||
| . preg_replace([ '~[\.][\.]+~Ui', '~[^\w\.]~Ui',], ['', '_'], $filename); | ||
| } | ||
|
|
||
| private function downloadUsingCurl(string $filename): ?string | ||
| { | ||
| $cURLHandle = curl_init($filename); | ||
|
|
||
| if ($cURLHandle !== false) { | ||
| curl_setopt($cURLHandle, CURLOPT_HTTPGET, true); | ||
| curl_setopt($cURLHandle, CURLOPT_HEADER, 0); | ||
| curl_setopt($cURLHandle, CURLOPT_RETURNTRANSFER, true); | ||
| curl_setopt($cURLHandle, CURLOPT_TIMEOUT, $this->externalImageTimeout); | ||
| curl_setopt($cURLHandle, CURLOPT_FOLLOWLOCATION, true); | ||
| curl_setopt($cURLHandle, CURLOPT_MAXREDIRS, 10); | ||
| curl_setopt($cURLHandle, CURLOPT_SSL_VERIFYPEER, false); | ||
| curl_setopt($cURLHandle, CURLOPT_FAILONERROR, true); | ||
|
|
||
| $cacheFileContent = curl_exec($cURLHandle); | ||
|
|
||
| $cURLErrNo = curl_errno($cURLHandle); | ||
| $cURLInfo = curl_getinfo($cURLHandle); | ||
|
|
||
| curl_close($cURLHandle); | ||
|
|
||
| if ($cURLErrNo != 0) { | ||
| $cacheFileContent = 'CURL_ERROR_' . $cURLErrNo; | ||
| } | ||
| if ($cURLInfo['http_code'] >= 400) { | ||
| $cacheFileContent = 'HTTP_CODE_' . $cURLInfo['http_code']; | ||
| } | ||
| } | ||
|
|
||
| return $cacheFileContent ?? null; | ||
| } | ||
|
|
||
| private function downloadUsingFileGetContent(string $filename): string | ||
| { | ||
| $remoteURLContext = stream_context_create([ | ||
| 'http' => [ | ||
| 'method' => 'GET', | ||
| 'timeout' => $this->externalImageTimeout, | ||
| 'max_redirects' => '10', | ||
| ] | ||
| ]); | ||
|
|
||
| $cacheFileContent = file_get_contents($filename, false, $remoteURLContext); | ||
| if ($cacheFileContent === false) { | ||
| $cacheFileContent = 'FGC_ERROR'; | ||
| } | ||
|
|
||
| return $cacheFileContent; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,85 @@ | ||||||||||||||
| <?php | ||||||||||||||
|
|
||||||||||||||
| declare(strict_types=1); | ||||||||||||||
|
|
||||||||||||||
| namespace PhpList\Core\Domain\Common; | ||||||||||||||
|
|
||||||||||||||
| use PhpList\Core\Domain\Configuration\Model\ConfigOption; | ||||||||||||||
| use PhpList\Core\Domain\Configuration\Service\Provider\ConfigProvider; | ||||||||||||||
|
|
||||||||||||||
| class Html2Text | ||||||||||||||
| { | ||||||||||||||
| private const WORD_WRAP = 70; | ||||||||||||||
|
|
||||||||||||||
| public function __construct(private readonly ConfigProvider $configProvider) | ||||||||||||||
| { | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| public function __invoke(string $html): string | ||||||||||||||
| { | ||||||||||||||
| $text = preg_replace("/\r/", '', $html); | ||||||||||||||
|
|
||||||||||||||
| $text = preg_replace("/<script[^>]*>(.*?)<\/script\s*>/is", '', $text); | ||||||||||||||
| $text = preg_replace("/<style[^>]*>(.*?)<\/style\s*>/is", '', $text); | ||||||||||||||
|
|
||||||||||||||
| $text = preg_replace( | ||||||||||||||
| "/<a[^>]*href=([\"\'])(.*)\\1[^>]*>(.*)<\/a>/Umis", | ||||||||||||||
| "[URLTEXT]\\3[ENDURLTEXT][LINK]\\2[ENDLINK]\n", | ||||||||||||||
| $text | ||||||||||||||
| ); | ||||||||||||||
| $text = preg_replace("/<b>(.*?)<\/b\s*>/is", '*\\1*', $text); | ||||||||||||||
| $text = preg_replace("/<h[\d]>(.*?)<\/h[\d]\s*>/is", "**\\1**\n", $text); | ||||||||||||||
| $text = preg_replace("/<i>(.*?)<\/i\s*>/is", '/\\1/', $text); | ||||||||||||||
| $text = preg_replace("/<\/tr\s*?>/i", "<\/tr>\n\n", $text); | ||||||||||||||
| $text = preg_replace("/<\/p\s*?>/i", "<\/p>\n\n", $text); | ||||||||||||||
| $text = preg_replace('/<br[^>]*?>/i', "<br>\n", $text); | ||||||||||||||
| $text = preg_replace("/<br[^>]*?\/>/i", "<br\/>\n", $text); | ||||||||||||||
| $text = preg_replace('/<table/i', "\n\n<table", $text); | ||||||||||||||
| $text = strip_tags($text); | ||||||||||||||
|
|
||||||||||||||
| // find all URLs and replace them back | ||||||||||||||
| preg_match_all('~\[URLTEXT\](.*)\[ENDURLTEXT\]\[LINK\](.*)\[ENDLINK\]~Umis', $text, $links); | ||||||||||||||
| foreach ($links[0] as $matchindex => $fullmatch) { | ||||||||||||||
| $linktext = $links[1][$matchindex]; | ||||||||||||||
| $linkurl = $links[2][$matchindex]; | ||||||||||||||
| // check if the text linked is a repetition of the URL | ||||||||||||||
| if (trim($linktext) == trim($linkurl) || | ||||||||||||||
| 'https://'.trim($linktext) == trim($linkurl) || | ||||||||||||||
| 'http://'.trim($linktext) == trim($linkurl) | ||||||||||||||
| ) { | ||||||||||||||
| $linkreplace = $linkurl; | ||||||||||||||
| } else { | ||||||||||||||
| //# if link is an anchor only, take it out | ||||||||||||||
| if (strpos($linkurl, '#') === 0) { | ||||||||||||||
| $linkreplace = $linktext; | ||||||||||||||
| } else { | ||||||||||||||
| $linkreplace = $linktext.' <'.$linkurl.'>'; | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| $text = str_replace($fullmatch, $linkreplace, $text); | ||||||||||||||
| } | ||||||||||||||
| $text = preg_replace( | ||||||||||||||
| "/<a href=[\"\'](.*?)[\"\'][^>]*>(.*?)<\/a>/is", | ||||||||||||||
| '[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]', | ||||||||||||||
| $text, | ||||||||||||||
| 500 | ||||||||||||||
| ); | ||||||||||||||
|
Comment on lines
+61
to
+66
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unreachable code: anchor tags are already stripped. This regex replacement looks for If this is intentional for edge cases, consider adding a comment explaining when it would apply, otherwise remove it. 🤖 Prompt for AI Agents |
||||||||||||||
|
|
||||||||||||||
| $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); | ||||||||||||||
|
|
||||||||||||||
| $text = preg_replace('/###NL###/', "\n", $text); | ||||||||||||||
| $text = preg_replace("/\n /", "\n", $text); | ||||||||||||||
| $text = preg_replace("/\t/", ' ', $text); | ||||||||||||||
|
|
||||||||||||||
| // reduce whitespace | ||||||||||||||
| while (preg_match('/ /', $text)) { | ||||||||||||||
| $text = preg_replace('/ /', ' ', $text); | ||||||||||||||
| } | ||||||||||||||
| while (preg_match("/\n\s*\n\s*\n/", $text)) { | ||||||||||||||
| $text = preg_replace("/\n\s*\n\s*\n/", "\n\n", $text); | ||||||||||||||
| } | ||||||||||||||
| $ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP; | ||||||||||||||
|
|
||||||||||||||
| return wordwrap($text, $ww); | ||||||||||||||
|
Comment on lines
+81
to
+83
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Type mismatch:
- $ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP;
+ $ww = (int) ($this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP);
return wordwrap($text, $ww);📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CRITICAL: SSL certificate verification is disabled.
Line 139 sets
CURLOPT_SSL_VERIFYPEERtofalse, which disables SSL certificate verification. This exposes the application to man-in-the-middle (MITM) attacks where an attacker could intercept or tamper with external image downloads.Remove or set to
true:If there are legitimate cases where self-signed certificates need to be supported, make this configurable via a parameter rather than hardcoding
false.📝 Committable suggestion
🤖 Prompt for AI Agents