diff --git a/apps/qubit/modules/informationobject/actions/browseAction.class.php b/apps/qubit/modules/informationobject/actions/browseAction.class.php index a64621ee8e..12253763e0 100644 --- a/apps/qubit/modules/informationobject/actions/browseAction.class.php +++ b/apps/qubit/modules/informationobject/actions/browseAction.class.php @@ -244,6 +244,41 @@ public function execute($request) $this->setView($request); + // Determine whether search highlighting is required + $hasQuery = 1 !== preg_match('/^[\s\t\r\n]*$/', $request->query) || 1 !== preg_match('/^[\s\t\r\n]*$/', $request->sq0); + $highlightSetting = QubitSetting::getByName('highlight_search_results'); + $highlightValue = null === $highlightSetting ? 0 : intval($highlightSetting->getValue(['sourceCulture' => true])); + $highlightEnabled = 1 === $highlightValue; + + // Add search term highlighting when any search criteria are present + if ($hasQuery && $highlightEnabled) { + $this->search->query->setHighlight([ + 'pre_tags' => [''], + 'post_tags' => [''], + 'fields' => [ + // This wildcard captures highlighting for all fields. More specific configs + // are set below which override this wildcard + '*' => [ + 'number_of_fragments' => 1, + 'fragment_size' => 150, + ], + // The following fields may be rendered in results. Don't fragment in this case + 'i18n.*.scopeAndContent' => [ + 'number_of_fragments' => 0, + 'fragment_size' => 0, + ], + 'i18n.*.title' => [ + 'number_of_fragments' => 0, + 'fragment_size' => 0, + ], + 'creators.i18n.*.authorizedFormOfName' => [ + 'number_of_fragments' => 0, + 'fragment_size' => 0, + ], + ], + ]); + } + $resultSet = QubitSearch::getInstance()->index->getIndex('QubitInformationObject')->search($this->search->getQuery(false, true)); // Page results diff --git a/apps/qubit/modules/search/templates/_searchResult.php b/apps/qubit/modules/search/templates/_searchResult.php index 79c6e01fb9..5639ebfe6f 100644 --- a/apps/qubit/modules/search/templates/_searchResult.php +++ b/apps/qubit/modules/search/templates/_searchResult.php @@ -1,4 +1,55 @@ -getData(); ?> +getData(); + +// The selected language might be different from the user's culture. This is a fallback culture. +$sfCulture = $sf_user->getCulture(); + +// The highlights contain a mapping from search index field name to an array of fragments +// containing the parts of that field that matched the query in the search index. If a field did +// not return results for the query, or higlighting is disabled, the key for that field will not +// exist in this array. +$highlights = reset($hit->getHighlights()); + +$titleHighlight = get_search_highlight($hit, 'title', ['culture' => $culture]); +$scopeHighlight = get_search_highlight($hit, 'scopeAndContent', ['culture' => $culture]); + +$creatorHighlight = $highlights["creators.i18n.{$culture}.authorizedFormOfName"][0] + ?? $highlights["creators.i18n.{$sfCulture}.authorizedFormOfName"][0] + ?? null; + +$refCodeHighlight = $highlights['referenceCode'][0] ?? null; +$identifierHighlight = $highlights['identifier'][0] ?? null; + +// We can render other highlights, but ignore: +// - Identifiers +// - The language filter itself +// - Scope and content, title, creators in other languages +$skippedFieldNames = ['referenceCode', 'identifier', 'i18n.languages']; +$skippedFieldPatterns = [ + '/^i18n\.[^.]+\.title$/', + '/^i18n\.[^.]+\.scopeAndContent$/', + '/^creators\.i18n\.[^.]+\.authorizedFormOfName$/', +]; + +$otherHighlights = array_filter( + $highlights, + function ($key) use ($skippedFieldNames, $skippedFieldPatterns) { + if (in_array($key, $skippedFieldNames, true)) { + return false; + } + foreach ($skippedFieldPatterns as $pattern) { + if (preg_match($pattern, $key)) { + return false; + } + } + + return true; + }, + ARRAY_FILTER_USE_KEY +); + +$maxFragmentSize = 150; +?>
@@ -39,10 +90,10 @@
false, 'culture' => $culture] + ['allowEmpty' => false, 'culture' => $culture, 'highlight' => $titleHighlight], )), ['module' => 'informationobject', 'slug' => $doc['slug']], ['class' => 'h5 mb-0 text-truncate'], @@ -63,10 +114,20 @@ '1' == sfConfig::get('app_inherit_code_informationobject', 1) && isset($doc['referenceCode']) && !empty($doc['referenceCode']) ) { ?> - + + + - + + + @@ -124,28 +185,58 @@ )), ['slug' => $doc['partOf']['slug'], 'module' => 'informationobject'] ); ?> - +
$culture] + ['culture' => $culture, 'highlight' => $scopeHighlight], )) { ?> - + false, 'culture' => $culture, 'cultureFallback' => true, 'highlight' => $creatorHighlight]) ) { ?> - + + + + = $maxFragmentSize; + ?> +
+
+ + +   + + + + + + + +
+
+
diff --git a/apps/qubit/modules/settings/actions/globalAction.class.php b/apps/qubit/modules/settings/actions/globalAction.class.php index f7a783d1d4..28a22e5fef 100644 --- a/apps/qubit/modules/settings/actions/globalAction.class.php +++ b/apps/qubit/modules/settings/actions/globalAction.class.php @@ -78,6 +78,7 @@ protected function populateGlobalForm() $multiRepository = QubitSetting::getByName('multi_repository'); $auditLogEnabled = QubitSetting::getByName('audit_log_enabled'); $showTooltips = QubitSetting::getByName('show_tooltips'); + $highlightSearchResults = QubitSetting::getByName('highlight_search_results'); $defaultPubStatus = QubitSetting::getByName('defaultPubStatus'); $draftNotificationEnabled = QubitSetting::getByName('draft_notification_enabled'); $swordDepositDir = QubitSetting::getByName('sword_deposit_dir'); @@ -103,6 +104,7 @@ protected function populateGlobalForm() 'slug_basis_informationobject' => (isset($slugTypeInformationObject)) ? intval($slugTypeInformationObject->getValue(['sourceCulture' => true])) : QubitSlug::SLUG_BASIS_TITLE, 'permissive_slug_creation' => (isset($permissiveSlugCreation)) ? intval($permissiveSlugCreation->getValue(['sourceCulture' => true])) : QubitSlug::SLUG_RESTRICTIVE, 'show_tooltips' => (isset($showTooltips)) ? intval($showTooltips->getValue(['sourceCulture' => true])) : 1, + 'highlight_search_results' => (isset($highlightSearchResults)) ? intval($highlightSearchResults->getValue(['sourceCulture' => true])) : 1, 'defaultPubStatus' => (isset($defaultPubStatus)) ? $defaultPubStatus->getValue(['sourceCulture' => true]) : QubitTerm::PUBLICATION_STATUS_DRAFT_ID, 'draft_notification_enabled' => (isset($draftNotificationEnabled)) ? intval($draftNotificationEnabled->getValue(['sourceCulture' => true])) : 0, 'sword_deposit_dir' => (isset($swordDepositDir)) ? $swordDepositDir->getValue(['sourceCulture' => true]) : null, @@ -243,6 +245,15 @@ protected function updateGlobalSettings() $setting->save(); } + // Highlight search results + if (null !== $highlightSearchResults = $thisForm->getValue('highlight_search_results')) { + $setting = QubitSetting::getByName('highlight_search_results'); + + // Force sourceCulture update to prevent discrepency in settings between cultures + $setting->setValue($highlightSearchResults, ['sourceCulture' => true]); + $setting->save(); + } + // Default publication status if (null !== $defaultPubStatus = $thisForm->getValue('defaultPubStatus')) { $setting = QubitSetting::getByName('defaultPubStatus'); diff --git a/apps/qubit/modules/settings/templates/globalSuccess.php b/apps/qubit/modules/settings/templates/globalSuccess.php index d4de11e008..96b112f592 100644 --- a/apps/qubit/modules/settings/templates/globalSuccess.php +++ b/apps/qubit/modules/settings/templates/globalSuccess.php @@ -54,6 +54,8 @@ default_repository_browse_view); ?> escape_queries); ?> + + highlight_search_results); ?> diff --git a/data/fixtures/settings.yml b/data/fixtures/settings.yml index 6238884005..b327cc5c23 100644 --- a/data/fixtures/settings.yml +++ b/data/fixtures/settings.yml @@ -1042,6 +1042,9 @@ QubitSetting: QubitSetting_showTooltips: name: show_tooltips value: 1 + QubitSetting_highlightSearchResults: + name: highlight_search_results + value: 1 QubitSetting_accessionMask: name: accession_mask value: '%Y-%m-%d/#i' diff --git a/lib/form/SettingsGlobalForm.class.php b/lib/form/SettingsGlobalForm.class.php index 24bf4bd6ba..700986f2bc 100644 --- a/lib/form/SettingsGlobalForm.class.php +++ b/lib/form/SettingsGlobalForm.class.php @@ -46,6 +46,7 @@ public function configure() 'enable_institutional_scoping' => new sfWidgetFormSelectRadio(['choices' => $options], ['class' => 'radio']), 'audit_log_enabled' => new sfWidgetFormSelectRadio(['choices' => $options], ['class' => 'radio']), 'show_tooltips' => new sfWidgetFormSelectRadio(['choices' => $options], ['class' => 'radio']), + 'highlight_search_results' => new sfWidgetFormSelectRadio(['choices' => $options], ['class' => 'radio']), 'slug_basis_informationobject' => $this->getSlugBasisInformationObjectWidget(), 'permissive_slug_creation' => new sfWidgetFormSelectRadio(['choices' => [QubitSlug::SLUG_PERMISSIVE => $this->i18n->__('Yes'), QubitSlug::SLUG_RESTRICTIVE => $this->i18n->__('No')]], ['class' => 'radio']), 'defaultPubStatus' => new sfWidgetFormSelectRadio(['choices' => [QubitTerm::PUBLICATION_STATUS_DRAFT_ID => $this->i18n->__('Draft'), QubitTerm::PUBLICATION_STATUS_PUBLISHED_ID => $this->i18n->__('Published')]], ['class' => 'radio']), @@ -70,6 +71,7 @@ public function configure() 'enable_institutional_scoping' => $this->i18n->__('Enable institutional scoping'), 'audit_log_enabled' => $this->i18n->__('Enable description change logging'), 'show_tooltips' => $this->i18n->__('Show tooltips'), + 'highlight_search_results' => $this->i18n->__('Highlight search results'), 'defaultPubStatus' => $this->i18n->__('Default publication status'), 'draft_notification_enabled' => $this->i18n->__('Show available drafts notification upon user login'), 'sword_deposit_dir' => $this->i18n->__('SWORD deposit directory'), @@ -92,6 +94,7 @@ public function configure() 'separator_character' => $this->i18n->__('The character separating hierarchical elements in a reference code'), 'inherit_code_informationobject' => $this->i18n->__('When set to "yes", the reference code string will be built using the information object identifier plus the identifiers of all its ancestors'), 'escape_queries' => $this->i18n->__('A list of special chars, separated by coma, to be escaped in string queries'), + 'highlight_search_results' => $this->i18n->__('Visually highlight how the search criteria matches the search results'), 'multi_repository' => $this->i18n->__('When set to "no", the repository name is excluded from certain displays because it will be too repetitive'), 'enable_institutional_scoping' => $this->i18n->__('Applies to multi-repository sites only. When set to "yes", additional search and browse options will be available at the repository level'), 'defaultPubStatus' => $this->i18n->__('Default publication status for newly created or imported %1%', ['%1%' => sfConfig::get('app_ui_label_informationobject')]), @@ -129,6 +132,7 @@ public function configure() $this->validatorSchema['permissive_slug_creation'] = new sfValidatorInteger(['required' => false]); $this->validatorSchema['audit_log_enabled'] = new sfValidatorInteger(['required' => false]); $this->validatorSchema['show_tooltips'] = new sfValidatorInteger(['required' => false]); + $this->validatorSchema['highlight_search_results'] = new sfValidatorInteger(['required' => false]); $this->validatorSchema['defaultPubStatus'] = new sfValidatorChoice(['choices' => [QubitTerm::PUBLICATION_STATUS_DRAFT_ID, QubitTerm::PUBLICATION_STATUS_PUBLISHED_ID]]); $this->validatorSchema['draft_notification_enabled'] = new sfValidatorInteger(['required' => false]); $this->validatorSchema['sword_deposit_dir'] = new sfValidatorString(['required' => false]); diff --git a/lib/helper/QubitHelper.php b/lib/helper/QubitHelper.php index 54a30e1d4d..b163c13829 100644 --- a/lib/helper/QubitHelper.php +++ b/lib/helper/QubitHelper.php @@ -389,6 +389,92 @@ function render_title($value, $renderMarkdown = true) return ''.sfContext::getInstance()->i18n->__('Untitled').''; } +/** + * Temporarily escape tags meant for highlighting text. These bypass the markdown + * parser's HTML escaping. + * + * @param string $value The string to escape + * + * @return string The escaped string + */ +function escape_marks($value) +{ + $currPos = 0; + + $openingMarks = []; + $closingMarks = []; + + while (($currPos = strpos($value, '', $currPos)) !== false) { + $openingMarks[] = $currPos; + $currPos += strlen(''); + } + + $currPos = 0; + + while (($currPos = strpos($value, '', $currPos)) !== false) { + $closingMarks[] = $currPos; + $currPos += strlen(''); + } + + if (0 === count($openingMarks) && 0 === count($closingMarks)) { + return $value; + } + + if (count($openingMarks) != count($closingMarks)) { + return $value; + } + + $replaced = str_replace('', "\x00MARK_OPEN\x00", $value); + + return str_replace('', "\x00MARK_CLOSE\x00", $replaced); +} + +/** + * Strip mark tags from markdown link URLs so Parsedown can parse the links + * correctly. Placeholders in link text are kept. + * + * @param string $value The string with mark placeholders + * + * @return string The string with placeholders removed from link URLs + */ +function strip_marks_from_link_urls($value) +{ + return preg_replace_callback( + '/\]\(([^)]*)\)/', + function ($matches) { + $url = str_replace('', '', $matches[1]); + $url = str_replace('', '', $url); + + return ']('.$url.')'; + }, + $value + ); +} + +/** + * Replace tags that had previously been escaped. + * + * @param string $value The escaped string + * + * @return string The string with proper tags + */ +function replace_marks($value) +{ + $replaced = str_replace("\x00MARK_OPEN\x00", '', $value); + + return str_replace("\x00MARK_CLOSE\x00", '', $replaced); +} + +function render_title_with_highlights($value, $renderMarkdown = true) +{ + $escaped = strip_marks_from_link_urls($value); + $escaped = escape_marks($escaped); + + $rendered = render_title($escaped); + + return replace_marks($rendered); +} + function render_value($value) { // Parse using Parsedown's text method in safe mode @@ -405,6 +491,16 @@ function render_value_inline($value) return QubitMarkdown::getInstance()->parse($value, $options); } +function render_value_with_highlights($value) +{ + $escaped = strip_marks_from_link_urls($value); + $escaped = escape_marks($escaped); + + $rendered = render_value_inline($escaped); + + return replace_marks($rendered); +} + function render_value_html($value) { // Parse using Parsedown's text method in unsafe mode @@ -691,12 +787,20 @@ function get_search_i18n($hit, $fieldName, $options = []) $hit = $hit->getData(); // type=sfOutputEscaperArrayDecorator } - $accessField = function ($culture) use ($hit, $fieldName) { + $highlight = $options['highlight'] ?? null; + + $accessField = function ($culture) use ($hit, $fieldName, $highlight) { if (empty($hit['i18n'][$culture][$fieldName])) { return false; } - return $hit['i18n'][$culture][$fieldName]; + if (null !== $highlight && '' !== $highlight) { + $val = $highlight; + } else { + $val = $hit['i18n'][$culture][$fieldName]; + } + + return $val; }; if (isset($options['culture'])) { @@ -726,12 +830,71 @@ function get_search_i18n($hit, $fieldName, $options = []) return $showUntitled(); } -function get_search_creation_details($hit, $culture = null) +/** + * Return the highlight fragment for an i18n field on an Elastica search hit, + * picked from the same culture whose value get_search_i18n would render. + * + * The culture-fallback chain mirrors get_search_i18n(): + * 1. the "culture" option (typically the selected/filtered culture), + * 2. the user/site culture (sf_user->getCulture()), + * 3. when the "cultureFallback" option is true (the default), the + * document's sourceCulture. + * + * @param mixed $hit + * @param mixed $fieldName + * @param mixed $options + */ +function get_search_highlight($hit, $fieldName, $options = []) { - if (!isset($culture)) { - $culture = sfContext::getInstance()->user->getCulture(); + if (empty($hit)) { + return null; + } + + if ( + !($hit instanceof sfOutputEscaperObjectDecorator) + || 'Elastica\Result' != $hit->getClass() + ) { + return null; + } + + $highlights = $hit->getHighlights(); + $highlights = reset($highlights); + + if (empty($highlights)) { + return null; + } + + $data = $hit->getData(); + + $cultureFallback = $options['cultureFallback'] ?? true; + + $hasField = function ($culture) use ($data, $fieldName) { + return !empty($culture) && !empty($data['i18n'][$culture][$fieldName]); + }; + + // Resolve the effective culture using the same priority chain as + // get_search_i18n: requested culture, then sf_culture, then sourceCulture. + $effective = null; + if (isset($options['culture']) && $hasField($options['culture'])) { + $effective = $options['culture']; + } elseif ($hasField($sfCulture = sfContext::getInstance()->user->getCulture())) { + $effective = $sfCulture; + } elseif ($cultureFallback) { + $sourceCulture = $data['sourceCulture'] ?? null; + if ($hasField($sourceCulture)) { + $effective = $sourceCulture; + } + } + + if (null === $effective) { + return null; } + return $highlights["i18n.{$effective}.{$fieldName}"][0] ?? null; +} + +function get_search_creation_details($hit, $options = []) +{ if ($hit instanceof sfOutputEscaperObjectDecorator && 'Elastica\Result' == $hit->getClass()) { $hit = $hit->getData(); // type=sfOutputEscaperArrayDecorator } @@ -740,8 +903,9 @@ function get_search_creation_details($hit, $culture = null) // Get creators $creators = $hit['creators']; + if (null !== $creators && 0 < count($creators)) { - $details[] = get_search_i18n($creators[0], 'authorizedFormOfName', ['allowEmpty' => false, 'cultureFallback' => true]); + $details[] = get_search_i18n($creators[0], 'authorizedFormOfName', $options); } // WIP, we are not showing labels for now. See #5202. diff --git a/lib/task/migrate/migrations/arMigration0198.class.php b/lib/task/migrate/migrations/arMigration0198.class.php new file mode 100644 index 0000000000..e31392e7a9 --- /dev/null +++ b/lib/task/migrate/migrations/arMigration0198.class.php @@ -0,0 +1,43 @@ +. + */ + +/* + * Add new setting for search highlighting. + * + * @package AccesstoMemory + * @subpackage migration + */ +class arMigration0198 +{ + public const VERSION = 198; + public const MIN_MILESTONE = 2; + + public function up($configuration) + { + if (null === QubitSetting::getByName('highlight_search_results')) { + $setting = new QubitSetting(); + $setting->name = 'highlight_search_results'; + $setting->editable = 1; + $setting->value = 1; + $setting->save(); + } + + return true; + } +} diff --git a/plugins/arDominionB5Plugin/scss/_layout.scss b/plugins/arDominionB5Plugin/scss/_layout.scss index 1f0a5ab4fb..99d96c8172 100644 --- a/plugins/arDominionB5Plugin/scss/_layout.scss +++ b/plugins/arDominionB5Plugin/scss/_layout.scss @@ -93,6 +93,34 @@ header i.fa-2x { .text-block :last-child { margin-bottom: 0; } + + /* Highlight search result matches */ + mark { + background-color: rgba(var(--bs-primary-rgb), 0.15); + color: inherit; + font-weight: 600; + padding: 0 0.25em; + border-radius: 0.2em; + text-decoration: underline; + text-decoration-color: rgba(var(--bs-primary-rgb), 0.85); + text-decoration-thickness: 2px; + } + + /* Ensure marks remain readable when nested in primary-coloured text */ + .text-primary mark { + color: var(--bs-body-color); + } + + .search-highlight-other { + margin-top: 0.5em; + padding: 0.5em; + border-left: solid $gray-600; + background-color: $gray-100; + + .search-highlight-fragment { + font-style: italic; + } + } } } diff --git a/test/phpunit/QubitHelperTest.php b/test/phpunit/QubitHelperTest.php new file mode 100644 index 0000000000..1c5cb61851 --- /dev/null +++ b/test/phpunit/QubitHelperTest.php @@ -0,0 +1,145 @@ +assertEquals($expected, render_value($input)); + } + + public function renderValueHtmlInjectionProvider() + { + return [ + 'script tag' => [ + '', + '

<script>alert("xss")</script>

', + ], + 'img onerror' => [ + '', + '

<img src=x onerror=alert(1)>

', + ], + 'anchor with javascript href' => [ + 'click', + '

<a href="javascript:alert(1)">click</a>

', + ], + 'event handler attribute' => [ + '
hover
', + '

<div onmouseover="alert(1)">hover</div>

', + ], + 'nested script in mark' => [ + '', + '

<mark><script>alert(1)</script></mark>

', + ], + 'iframe' => [ + '', + '

<iframe src="https://example.com"></iframe>

', + ], + 'plain text unchanged' => [ + 'Hello world', + '

Hello world

', + ], + ]; + } + + /** + * @dataProvider renderValueWithHighlightsHtmlInjectionProvider + * + * @param mixed $input + * @param mixed $expected + */ + public function testRenderValueWithHighlightsEscapesHtml($input, $expected) + { + $this->assertEquals($expected, render_value_with_highlights($input)); + } + + public function renderValueWithHighlightsHtmlInjectionProvider() + { + return [ + 'mark tags are preserved' => [ + 'highlighted', + 'highlighted', + ], + 'script tag is escaped' => [ + '', + '<script>alert("xss")</script>', + ], + 'script inside mark is escaped' => [ + '', + '<script>alert(1)</script>', + ], + 'img onerror is escaped' => [ + '', + '<img src=x onerror=alert(1)>', + ], + 'anchor with javascript href is escaped' => [ + 'click', + '<a href="javascript:alert(1)">click</a>', + ], + 'event handler attribute is escaped' => [ + '
hover
', + '<div onmouseover="alert(1)">hover</div>', + ], + 'iframe is escaped' => [ + '', + '<iframe src="https://example.com"></iframe>', + ], + 'fake mark with attributes is escaped' => [ + 'text', + '<mark onmouseover="alert(1)">text</mark>', + ], + 'mark with extra spaces is escaped' => [ + 'text', + '<mark >text</mark >', + ], + 'orphaned closing mark is escaped' => [ + 'text', + 'text</mark>', + ], + 'orphaned opening mark is escaped' => [ + 'text', + '<mark>text', + ], + 'unbalanced marks' => [ + 'firstsecond', + '<mark>first</mark><mark>second', + ], + 'markdown link with marks in text and url' => [ + '[slug](/index.php/slug)', + 'slug', + ], + 'markdown link with external url' => [ + '[slug](https://example.com/slug)', + 'slug', + ], + 'markdown link with relative url' => [ + '[slug](/slug)', + 'slug', + ], + 'mixed marks and other html' => [ + 'safealso safe', + 'safe<script>alert(1)</script>also safe', + ], + 'plain text unchanged' => [ + 'Hello world', + 'Hello world', + ], + 'multiple marks preserved' => [ + 'Some first and second highlights', + 'Some first and second highlights', + ], + ]; + } +}