/classes/ezfezpsolrquerybuilder.php
PHP | 1929 lines | 1264 code | 214 blank | 451 comment | 174 complexity | 8dd80181eee95da8bbef591df003fea7 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- <?php
- //
- //
- // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
- // SOFTWARE NAME: eZ Find
- // SOFTWARE RELEASE: 1.0.x
- // COPYRIGHT NOTICE: Copyright (C) 1999-2013 eZ Systems AS
- // SOFTWARE LICENSE: GNU General Public License v2.0
- // NOTICE: >
- // This program is free software; you can redistribute it and/or
- // modify it under the terms of version 2.0 of the GNU General
- // Public License as published by the Free Software Foundation.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // You should have received a copy of version 2.0 of the GNU General
- // Public License along with this program; if not, write to the Free
- // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- // MA 02110-1301, USA.
- //
- //
- // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
- //
- /*! \file ezfezpsolrquerybuilder.php
- */
- /*!
- \class ezfeZPSolrQueryBuilder ezfezpsolrquerybuilder.php
- \brief The class ezfeZPSolrQueryBuilder does
- */
- class ezfeZPSolrQueryBuilder
- {
- /**
- * Constructor
- *
- * Sets variables for creating a new instance of ezfeZPSolrQueryBuilder
- * @param Object $searchPluginInstance Search engine instance. Allows the query builder to
- * communicate with the caller ( eZSolr instance ).
- */
- function ezfeZPSolrQueryBuilder( $searchPluginInstance )
- {
- $this->searchPluginInstance = $searchPluginInstance;
- }
- /**
- * @since eZ Find 2.0
- * build a multi field query, basically doing the same as a Lucene MultiField query
- * not always safe
- * @param string $searchText
- * @param array $solrFields
- * @param string $boostFields a hash array
- *
- */
- public function buildMultiFieldQuery( $searchText, $solrFields = array(), $boostFields = array() )
- {
- // simple implode implying an OR functionality
- $multiFieldQuery = '';
- // prepare boostfields arguments if any
- $processedBoostFields = array();
- foreach ( $boostFields as $baseName => $boostValue )
- {
- if ( strpos( $boostValue, ':' ) !== false && is_numeric( $baseName ) )
- {
- // split at the first colon, leave the rest intact
- list( $baseName, $boostValue ) = explode( ':', $boostValue, 2 );
- }
- if ( is_numeric( $boostValue ) )
- {
- // Get internal field name.
- $baseName = eZSolr::getFieldName( $baseName );
- $processedBoostFields[$baseName] = $boostValue;
- }
- }
- foreach ( $solrFields as $field )
- {
- //don't mind the last extra space, it's ignored by Solr
- $multiFieldQuery .= $field . ':(' . $searchText . ')';
- // check if we need to apply a boost
- if ( array_key_exists( $field, $processedBoostFields ) )
- {
- $multiFieldQuery .= '^' . $processedBoostFields[$field];
- }
- $multiFieldQuery .= ' ';
- }
- return $multiFieldQuery;
- }
- /**
- * Search on the Solr search server
- *
- * @param string search term
- * @param array parameters.
- * Example:
- * <code>
- * array( 'SearchOffset' => <offset>,
- * 'SearchLimit' => <limit>,
- * 'SearchSubTreeArray' => array( <node ID1>[, <node ID2>]... ),
- * 'SearchContentClassID' => array( <class ID1>[, <class ID2>]... ),
- * 'SearchContentClassAttributeID' => <class attribute ID>,
- * 'Facet' => array( array( 'field' => <class identifier>/<attribute identifier>[/<option>], ... ) ) ),
- * 'Filter' => array( <base_name> => <value>, <base_name2> => <value2> ),
- * 'SortBy' => array( <field> => <asc|desc> [, <field2> => <asc|desc> [,...]] ) |
- * array( array( <field> => <asc|desc> )[, array( <field2> => <asc|desc> )[,...]] ),
- * 'BoostFunctions' => array( 'fields' => array(
- * 'article/title' => 2,
- * 'modified:5'
- * ),
- * 'functions' => array( 'rord(meta_modified_dt)^10' )
- * ),
- * 'ForceElevation' => false,
- * 'EnableElevation' => true
- * 'DistributedSearch" => array ( 'shards', array( 'shard1', 'shard2' , ... )
- * 'searchfields', array ( 'myfield1, 'myfield2', ... )
- * 'returnfields', array ( 'myfield1, 'myfield2', ... )
- * 'rawfilterlist, array ( 'foreignfield:a', '(foreignfield:b AND otherfield:c)', ... )
- * )
- * );
- * </code>
- * For full facet description, see facets design document.
- * For full description about 'ForceElevation', see elevate support design document ( elevate_support.rst.txt )
- *
- * the rawFilterList in distributed search is appended to the policyfilterlist with an 'OR' for each entry, as the policy list will
- * in general not be applicable to foreign indexes. To be used with care!
- *
- * @param array Search types. Reserved.
- *
- * @return array Solr query results.
- *
- * @see ezfeZPSolrQueryBuilder::buildBoostFunctions()
- */
- public function buildSearch( $searchText, $params = array(), $searchTypes = array() )
- {
- eZDebugSetting::writeDebug( 'extension-ezfind-query', $params, 'search params' );
- $searchCount = 0;
- $offset = ( isset( $params['SearchOffset'] ) && $params['SearchOffset'] ) ? $params['SearchOffset'] : 0;
- $limit = ( isset( $params['SearchLimit'] ) && $params['SearchLimit'] ) ? $params['SearchLimit'] : 10;
- $subtrees = isset( $params['SearchSubTreeArray'] ) ? $params['SearchSubTreeArray'] : array();
- $contentClassID = ( isset( $params['SearchContentClassID'] ) && $params['SearchContentClassID'] <> -1 ) ? $params['SearchContentClassID'] : false;
- $contentClassAttributeID = ( isset( $params['SearchContentClassAttributeID'] ) && $params['SearchContentClassAttributeID'] <> -1 ) ? $params['SearchContentClassAttributeID'] : false;
- $sectionID = isset( $params['SearchSectionID'] ) && $params['SearchSectionID'] > 0 ? $params['SearchSectionID'] : false;
- $dateFilter = isset( $params['SearchDate'] ) && $params['SearchDate'] > 0 ? $params['SearchDate'] : false;
- $asObjects = isset( $params['AsObjects'] ) ? $params['AsObjects'] : true;
- $spellCheck = isset( $params['SpellCheck'] ) && $params['SpellCheck'] > 0 ? $params['SpellCheck'] : array();
- $queryHandler = isset( $params['QueryHandler'] ) ? $params['QueryHandler'] : self::$FindINI->variable( 'SearchHandler', 'DefaultSearchHandler' );
- // eZFInd 2.3: check ini setting and take it as a default instead of false
- $visibilityDefaultSetting = self::$SiteINI->variable( 'SiteAccessSettings', 'ShowHiddenNodes' );
- $visibilityDefault = ( $visibilityDefaultSetting === 'true' ) ? true : false;
- $ignoreVisibility = isset( $params['IgnoreVisibility'] ) ? $params['IgnoreVisibility'] : $visibilityDefault;
- $this->searchPluginInstance->postSearchProcessingData['ignore_visibility'] = $ignoreVisibility;
- $limitation = isset( $params['Limitation'] ) ? $params['Limitation'] : null;
- $boostFunctions = isset( $params['BoostFunctions'] ) ? $params['BoostFunctions'] : null;
- $forceElevation = isset( $params['ForceElevation'] ) ? $params['ForceElevation'] : false;
- $enableElevation = isset( $params['EnableElevation'] ) ? $params['EnableElevation'] : true;
- $distributedSearch = isset( $params['DistributedSearch'] ) ? $params['DistributedSearch'] : false;
- $fieldsToReturn = isset( $params['FieldsToReturn'] ) ? $params['FieldsToReturn'] : array();
- $highlightParams = isset( $params['HighLightParams'] ) ? $params['HighLightParams'] : array();
- $searchResultClusterParams = isset( $params['SearchResultClustering'] ) ? $params['SearchResultClustering'] : array();
- $extendedAttributeFilter = isset( $params['ExtendedAttributeFilter'] ) ? $params['ExtendedAttributeFilter'] : array();
- // distributed search option
- // @since ezfind 2.2
- $extraFieldsToSearch = array();
- $extraFieldsToReturn = array();
- $shardURLs = array();
- $iniShards = self::$SolrINI->variable( 'SolrBase' , 'Shards' );
- $shardQuery = NULL;
- $shardFilterQuery = array();
- if ( isset( $distributedSearch['shards'] ) )
- {
- foreach ( $distributedSearch['shards'] as $shard )
- {
- $shardURLs[] = $iniShards[$shard];
- }
- $shardQuery = implode( ',', $shardURLs );
- }
- if ( isset( $distributedSearch['searchfields'] ) )
- {
- $extraFieldsToSearch = $distributedSearch['searchfields'];
- }
- if ( isset( $distributedSearch['returnfields'] ) )
- {
- $extraFieldsToReturn = $distributedSearch['returnfields'];
- }
- if ( isset( $distributedSearch['rawfilterlist'] ) )
- {
- $shardFilterQuery = $distributedSearch['rawfilterlist'];
- }
- // check if filter parameter is indeed an array, and set it otherwise
- if ( isset( $params['Filter']) && ! is_array( $params['Filter'] ) )
- {
- $params['Filter'] = array( $params['Filter'] );
- }
- $filterQuery = array();
- // Add subtree query filter
- if ( !empty( $subtrees ) )
- {
- $this->searchPluginInstance->postSearchProcessingData['subtree_array'] = $subtrees;
- $subtreeQueryParts = array();
- foreach ( $subtrees as $subtreeNodeID )
- {
- $subtreeQueryParts[] = eZSolr::getMetaFieldName( 'path' ) . ':' . $subtreeNodeID;
- }
- $filterQuery[] = implode( ' OR ', $subtreeQueryParts );
- }
- // Add policy limitation query filter
- $policyLimitationFilterQuery = $this->policyLimitationFilterQuery( $limitation, $ignoreVisibility );
- if ( $policyLimitationFilterQuery !== false )
- {
- $filterQuery[] = $policyLimitationFilterQuery;
- }
- // Add time/date query filter
- if ( $dateFilter > 0 )
- {
- switch ( $dateFilter )
- {
- // last day
- case 1:
- $searchTimestamp = strtotime( '-1 day' );
- break;
- // last week
- case 2:
- $searchTimestamp = strtotime( '-1 week' );
- break;
- // last month
- case 3:
- $searchTimestamp = strtotime( '-1 month' );
- break;
- // last three month
- case 4:
- $searchTimestamp = strtotime( '-3 month' );
- break;
- // last year
- case 5:
- $searchTimestamp = strtotime( '-1 year' );
- break;
- }
- $filterQuery[] = eZSolr::getMetaFieldName( 'published' ) . ':[' . ezfSolrDocumentFieldBase::preProcessValue( $searchTimestamp, 'date' ) .'/DAY TO *]';
- }
- if ( (!eZContentObjectTreeNode::showInvisibleNodes() || !$ignoreVisibility ) && ( self::$FindINI->variable( 'SearchFilters', 'FilterHiddenFromDB' ) == 'enabled' ) )
- {
- $db = eZDB::instance();
- $invisibleNodeIDArray = $db->arrayQuery( 'SELECT node_id FROM ezcontentobject_tree WHERE ezcontentobject_tree.is_invisible = 1', array( 'column' => 0) );
- $hiddenNodesQueryText = 'meta_main_node_id_si:[* TO *] -meta_main_node_id_si:(';
- foreach ( $invisibleNodeIDArray as $element )
- {
- $hiddenNodesQueryText = $hiddenNodesQueryText . $element['node_id'] . ' ';
- }
- $hiddenNodesQueryText = $hiddenNodesQueryText . ')';
- // only add filter if there are hidden nodes after all
- if ( $invisibleNodeIDArray )
- {
- $filterQuery[] = $hiddenNodesQueryText;
- }
- }
- // Add content class query filter
- $classLimitationFilter = $this->getContentClassFilterQuery( $contentClassID );
- if ( $classLimitationFilter !== null )
- {
- $filterQuery[] = $classLimitationFilter;
- }
- // Add section to query filter.
- if ( $sectionID )
- {
- $filterQuery[] = eZSolr::getMetaFieldName( 'section_id' ) . ':' . $sectionID;
- }
- $languageFilterQuery = $this->buildLanguageFilterQuery();
- if ( $languageFilterQuery )
- {
- $filterQuery[] = $languageFilterQuery;
- }
- $paramFilterQuery = $this->getParamFilterQuery( $params );
- if ( $paramFilterQuery )
- {
- $filterQuery[] = $paramFilterQuery;
- }
- //add raw filters
- if ( self::$FindINI->hasVariable( 'SearchFilters', 'RawFilterList' ) )
- {
- $rawFilters = self::$FindINI->variable( 'SearchFilters', 'RawFilterList' );
- if ( is_array( $rawFilters ) )
- {
- $filterQuery = array_merge( $filterQuery, $rawFilters );
- }
- }
- // Build and get facet query prameters.
- $facetQueryParamList = $this->buildFacetQueryParamList( $params );
- // search only text type declared fields
- $fieldTypeExcludeList = $this->fieldTypeExludeList( NULL );
- // Create sort parameters based on the parameters.
- $sortParameter = $this->buildSortParameter( $params );
- //the array_unique below is necessary because attribute identifiers are not unique .. and we get as
- //much highlight snippets as there are duplicate attribute identifiers
- //these are also in the list of query fields (dismax, ezpublish) request handlers
- $queryFields = array_unique( $this->getClassAttributes( $contentClassID, $contentClassAttributeID, $fieldTypeExcludeList ) );
- //highlighting only in the attributes, otherwise the object name is repeated in the highlight, which is already
- //partly true as it is mostly composed of one or more attributes.
- //maybe we should add meta data to the index to filter them out.
- $highLightFields = $queryFields;
- //@since eZ Find 2.3
- //when dedicated attributes are searched for, don't add meta-fields to the $queryfields list
- if ( !$contentClassAttributeID )
- {
- $queryFields[] = eZSolr::getMetaFieldName( 'name' );
- $queryFields[] = eZSolr::getMetaFieldName( 'owner_name' );
- }
- $spellCheckParamList = array();
- // @param $spellCheck expects array (true|false, dictionary identifier, ...)
- if ( ( isset( $spellCheck[0] ) and $spellCheck[0] ) or
- ( self::$FindINI->variable( 'SpellCheck', 'SpellCheck' ) == 'enabled' and ( isset( $spellCheck[0] ) and !$spellCheck[0] ) ) )
- {
- $dictionary = isset( $spellCheck[1]) ? $spellCheck[1] : self::$FindINI->variable( 'SpellCheck', 'DefaultDictionary' );
- $spellCheckParamList = array(
- 'spellcheck' => 'true',
- // q is manipulated in case of standard request handler, so make it explicit by using spellcheck.q
- 'spellcheck.q' => $searchText,
- 'spellcheck.dictionary' => $dictionary,
- 'spellcheck.collate' => 'true',
- 'spellcheck.extendedResults' => 'true',
- 'spellcheck.onlyMorePopular' => 'true',
- 'spellcheck.count' => 1);
- }
- // Create the Elevate-related parameters here :
- $elevateParamList = eZFindElevateConfiguration::getRuntimeQueryParameters( $forceElevation, $enableElevation, $searchText );
- // process query handler: standard, simplestandard, ezpublish, heuristic
- // first determine which implemented handler to use when heuristic is specified
- if ( strtolower( $queryHandler ) === 'heuristic' )
- {
- // @todo: this code will evolve of course
- if ( preg_match( '/[\^\*\~]|AND|OR/', $searchText) > 0 )
- {
- $queryHandler = 'simplestandard';
- }
- else
- {
- $queryHandler = 'ezpublish';
- }
- }
- $handlerParameters = array();
- $queryHandler = strtolower( $queryHandler );
- switch ( $queryHandler )
- {
- case 'standard':
- // @todo: this is more complicated
- // build the query against all "text" like fields
- // should take into account all the filter fields and class filters to shorten the query
- // need to build: Solr q
- if ( array_key_exists( 'fields', $boostFunctions ) )
- {
- $handlerParameters = array ( 'q' => $this->buildMultiFieldQuery( $searchText, array_merge( $queryFields, $extraFieldsToSearch ), $boostFunctions['fields'] ),
- 'qt' => 'standard' );
- }
- else
- {
- $handlerParameters = array ( 'q' => $this->buildMultiFieldQuery( $searchText, array_merge( $queryFields, $extraFieldsToSearch ) ),
- 'qt' => 'standard' );
- }
- break;
- case 'simplestandard':
- // not to do much, searching is against the default aggregated field
- // only highlightfields
- $highLightFields = array ( 'ezf_df_text' );
- $handlerParameters = array ( 'q' => $searchText,
- 'qt' => 'standard',
- 'hl.usePhraseHighlighter' => 'true',
- 'hl.highlightMultiTerm' => 'true' );
- break;
- case 'ezpublish':
- // the dismax based handler, just keywordss input, most useful for ordinary queries by users
- // need to build: Solr q, qf, dismax specific parameters
- default:
- // ezpublish of course, this to not break BC and is the most "general"
- // if another value is specified, it is supposed to be a dismax like handler
- // with possible other tuning variables then the stock provided 'ezpublish' in solrconfi.xml
- // remark it should be lowercase in solrconfig.xml!
- $boostQueryString = $this->boostQuery();
- $rawBoostQueries = self::$FindINI->variable( 'QueryBoost', 'RawBoostQueries' );
- if ( is_array( $rawBoostQueries ) && !empty( $rawBoostQueries ) )
- {
- $boostQueryString .= ' ' . implode( ' ', $rawBoostQueries );
- }
- $handlerParameters = array ( 'q' => $searchText,
- 'bq' => $boostQueryString,
- 'qf' => implode( ' ', array_merge( $queryFields, $extraFieldsToSearch ) ),
- 'qt' => $queryHandler );
- }
- // Handle boost functions :
- $boostFunctionsParamList = $this->buildBoostFunctions( $boostFunctions, $handlerParameters );
- // special handling of filters in the case of distributed search filters
- // incorporate distributed search filters if defined with an OR expression, and AND-ing all others
- // need to do this as multiple fq elements are otherwise AND-ed by the Solr backend
- // when using this to search across a dedicated set of languages, it will still be valid with the ezp permission
- // scheme
- if ( !empty( $shardFilterQuery ) )
- {
- $fqString = '((' . implode( ') AND (', $filterQuery ) . ')) OR ((' . implode( ') OR (', $shardFilterQuery ) . '))';
- // modify the filterQuery array with this single string as the only element
- $filterQuery = array( $fqString );
- }
- $fieldsToReturnString = eZSolr::getMetaFieldName( 'guid' ) . ' ' . eZSolr::getMetaFieldName( 'installation_id' ) . ' ' .
- eZSolr::getMetaFieldName( 'main_url_alias' ) . ' ' . eZSolr::getMetaFieldName( 'installation_url' ) . ' ' .
- eZSolr::getMetaFieldName( 'id' ) . ' ' . eZSolr::getMetaFieldName( 'main_node_id' ) . ' ' .
- eZSolr::getMetaFieldName( 'language_code' ) . ' ' . eZSolr::getMetaFieldName( 'name' ) .
- ' score ' . eZSolr::getMetaFieldName( 'published' ) . ' ' . eZSolr::getMetaFieldName( 'path_string' ) . ' ' .
- eZSolr::getMetaFieldName( 'main_path_string' ) . ' ' . eZSolr::getMetaFieldName( 'is_invisible' ) . ' ' .
- implode( ' ', $extraFieldsToReturn );
- if ( ! $asObjects )
- {
- if ( empty( $fieldsToReturn ))
- {
- // @todo: needs to be refined with Solr supporting globbing in fl argument, otherwise requests will be to heavy for large fields as for example binary file content
- $fieldsToReturnString = 'score, *';
- }
- else
- {
- $fieldsToReturnString .= ' ' . implode( ' ', $fieldsToReturn);
- }
- }
- $searchResultClusterParamList = array( 'clustering' => 'true');
- $searchResultClusterParamList = $this->buildSearchResultClusterQuery($searchResultClusterParams);
- eZDebugSetting::writeDebug( 'extension-ezfind-query', $searchResultClusterParamList, 'Cluster params' );
- $queryParams = array_merge(
- $handlerParameters,
- array(
- 'start' => $offset,
- 'rows' => $limit,
- 'sort' => $sortParameter,
- 'indent' => 'on',
- 'version' => '2.2',
- 'fl' => $fieldsToReturnString,
- 'fq' => $filterQuery,
- 'hl' => self::$FindINI->variable( 'HighLighting', 'Enabled' ),
- 'hl.fl' => implode( ' ', $highLightFields ),
- 'hl.snippets' => self::$FindINI->variable( 'HighLighting', 'SnippetsPerField' ),
- 'hl.fragsize' => self::$FindINI->variable( 'HighLighting', 'FragmentSize' ),
- 'hl.requireFieldMatch' => self::$FindINI->variable( 'HighLighting', 'RequireFieldMatch' ),
- 'hl.simple.pre' => self::$FindINI->variable( 'HighLighting', 'SimplePre' ),
- 'hl.simple.post' => self::$FindINI->variable( 'HighLighting', 'SimplePost' ),
- 'wt' => 'php'
- ),
- $facetQueryParamList,
- $spellCheckParamList,
- $boostFunctionsParamList,
- $elevateParamList,
- $searchResultClusterParamList
- );
- if( isset( $extendedAttributeFilter['id'] ) && isset( $extendedAttributeFilter['params'] ) )
- {
- //single filter
- $extendedAttributeFilter = array( $extendedAttributeFilter );
- }
- foreach( $extendedAttributeFilter as $filterDefinition )
- {
- if( isset( $filterDefinition['id'] ) )
- {
- $filter = eZFindExtendedAttributeFilterFactory::getInstance( $filterDefinition['id'] );
- if( $filter )
- {
- $filterParams = isset( $filterDefinition['params'] ) ? $filterDefinition['params'] : array();
- $queryParams = $filter->filterQueryParams( $queryParams, $filterParams );
- }
- }
- }
- return $queryParams;
- }
- /**
- * @since eZ Find 2.1
- *
- * Language filtering.
- * This method builds the language filter, depending on the following settings :
- *
- * In site.ini :
- * <code>
- * # Prioritized list of languages. Only translations in these
- * # languages will be shown
- *
- * [RegionalSettings]
- * SiteLanguageList[]
- * SiteLanguageList[]=eng-GB
- * SiteLanguageList[]=fre-FR
- * </code>
- *
- * And in ezfind.ini :
- * <code>
- * [LanguageSearch]
- * SearchMainLanguageOnly=enabled
- * </code>
- *
- * When SearchMainLanguageOnly is set to 'enabled', only results in the first language in SiteLanguageList[] will be returned.
- * When SearchMainLanguageOnly is set to 'disabled', searching will be done across all possible translations defined in
- * SiteLanguageList[] (unless ShowUntranslatedObjects is enabled, in this case no language filtering will be done at all)
- *
- *
- * @return string The correct language filtering string, appended to the 'fq' parameter in the Solr request.
- */
- protected function buildLanguageFilterQuery()
- {
- $languageFilterString = '';
- $ini = eZINI::instance();
- $languages = $ini->variable( 'RegionalSettings', 'SiteLanguageList' );
- $searchMainLanguageOnly = self::$FindINI->variable( 'LanguageSearch', 'SearchMainLanguageOnly' ) == 'enabled';
- $showUntranslatedObjects = $ini->variable( 'RegionalSettings', 'ShowUntranslatedObjects' ) == 'enabled';
- $languageCodeMetaName = eZSolr::getMetaFieldName( 'language_code' );
- $availableLanguageCodesMetaName = eZSolr::getMetaFieldName( 'available_language_codes' );
- if ( $searchMainLanguageOnly )
- {
- $languageFilterString = $languageCodeMetaName . ':' . $languages[0];
- }
- else if ( $showUntranslatedObjects === false )
- {
- $languageFilterString = $languageCodeMetaName . ':(' . implode( ' OR ' , $languages ) . ')';
- $languageFilterString .= " OR ( " . eZSolr::getMetaFieldName( 'always_available' ) . ':true )';
- }
- return $languageFilterString;
- }
- /**
- * @since eZ Find 2.0
- *
- * Boost Functions support.
- * "Allows one to use the actual value of a numeric field and functions of those fields in a relevancy score."
- *
- * @see http://wiki.apache.org/solr/FunctionQuery
- * @param array $boostFunctions Example :
- * <code>
- * $boostFunctions = array( 'fields' => array(
- * 'article/title' => 2,
- * 'modified:5'
- * ),
- * 'functions' => array( 'rord(meta_modified_dt)^10' )
- * );
- * </code>
- * @param array &$handlerParameters The inclusion of boost functions in the final search parameter array depends on which queryHandler is used.
- * This parameter shall be modified in one of the cases.
- *
- * @return array containing the boost expressions for the various request handler boost parameters
- */
- protected function buildBoostFunctions( $boostFunctions = null, &$handlerParameters )
- {
- if ( $boostFunctions == null )
- return array();
- // Build boost function string here.
- // Field boosts and functions seems to be mutually exclusive.
- $boostString = '';
- $processedBoostFunctions = array();
- $processedBoostFunctions['fields'] = $processedBoostFunctions['functions'] = array();
- // Process simple query-time field boosting first :
- if ( array_key_exists( 'fields', $boostFunctions ) )
- {
- foreach ( $boostFunctions['fields'] as $baseName => $boostValue )
- {
- if ( strpos( $boostValue, ':' ) !== false && is_numeric( $baseName ) )
- {
- // split at the first colon, leave the rest intact
- list( $baseName, $boostValue ) = explode( ':', $boostValue, 2 );
- }
- if ( is_numeric( $boostValue ) )
- {
- // Get internal field name.
- $baseName = eZSolr::getFieldName( $baseName );
- $processedBoostFunctions['fields'][] = $baseName . '^' . $boostValue;
- }
- }
- }
- if ( array_key_exists( 'functions', $boostFunctions ) )
- {
- // Process simple query-time field boosting first :
- foreach ( $boostFunctions['functions'] as $expression )
- {
- // @TODO : parse $expression. use an ezi18n-like system ( formats ), meaning that the $boostFunctions['functions'] will look like this :
- /* <code>
- * array( 'product( pow( %rating, 5 ), %modified )' => array( '%rating' => 'article/rating',
- * '%modified' => 'modified' )
- * );
- * </code>
- *
- * Eventually, one single expression is to be accepted here, as is the case in Solr.
- */
- $processedBoostFunctions['functions'][] = $expression;
- }
- }
- switch ( $handlerParameters['qt'] )
- {
- case 'ezpublish' :
- {
- // The edismax based handler which takes its own boost parameters
- // Push the boost expression in the 'bf' parameter, if it is not empty.
- //
- // for the fields to boost, modify the qf parameter for edismax
- // this is set before in the buildSearch method
- $queryFields = explode(' ', $handlerParameters['qf']);
- foreach ( $processedBoostFunctions['fields'] as $fieldToBoost => $boostString )
- {
- $key = array_search($fieldToBoost, $queryFields);
- if (false !== $key)
- {
- $queryFields[$key] = $boostString;
- }
- // might be a custom created field, lets add it implicitely with its boost specification
- else
- {
- $queryFields[] = $boostString;
- }
- }
- $handlerParameters['qf'] = implode( ' ', $queryFields );
- $boostReturnArray = array();
- //additive boost functions
- if ( array_key_exists( 'functions', $boostFunctions ) )
- {
- $boostReturnArray['bf'] = $boostFunctions['functions'];
- }
- // multiplicative boost functions
- if ( array_key_exists( 'mfunctions', $boostFunctions ) )
- {
- $boostReturnArray['boost'] = $boostFunctions['mfunctions'];
- }
- //add the queries to the existing bq edismax parameter
- if ( array_key_exists( 'queries', $boostFunctions ) )
- {
- $handlerParameters['bq'] .= ' ' . implode(' ', $boostFunctions['queries']);
- }
- return $boostReturnArray;
- } break;
- default:
- {
- // Simplestandard or standard search handlers.
- // Append the boost expression to the 'q' parameter.
- // Alter the $handlerParameters array ( passed as reference )
- // @TODO : Handle query-time field boosting through the buildMultiFieldQuery() method.
- // Requires a modified 'heuristic' mode.
- $boostString = implode( ' ', $processedBoostFunctions['functions'] );
- $handlerParameters['q'] .= ' _val_:' . trim( $boostString );
- } break;
- }
- return array();
- }
- /**
- * @since eZ Find 2.0
- *
- * More Like This similarity searches
- * @param query
- *
- * @return
- */
- public function buildMoreLikeThis( $queryType, $query, $params = array() )
- {
- eZDebugSetting::writeDebug( 'extension-ezfind-query-mlt', $queryType, 'mlt querytype' );
- eZDebugSetting::writeDebug( 'extension-ezfind-query-mlt', $query, 'mlt query' );
- eZDebugSetting::writeDebug( 'extension-ezfind-query-mlt', $params, 'mlt params' );
- $searchCount = 0;
- $queryInstallationID = ( isset( $params['QueryInstallationID'] ) && $params['QueryInstallationID'] ) ? $params['QueryInstallationID'] : eZSolr::installationID();
- $offset = ( isset( $params['SearchOffset'] ) && $params['SearchOffset'] ) ? $params['SearchOffset'] : 0;
- $limit = ( isset( $params['SearchLimit'] ) && $params['SearchLimit'] ) ? $params['SearchLimit'] : 10;
- $subtrees = isset( $params['SearchSubTreeArray'] ) ? $params['SearchSubTreeArray'] : array();
- $contentClassID = ( isset( $params['SearchContentClassID'] ) && $params['SearchContentClassID'] <> -1 ) ? $params['SearchContentClassID'] : false;
- $sectionID = isset( $params['SearchSectionID'] ) && $params['SearchSectionID'] > 0 ? $params['SearchSectionID'] : false;
- $filterQuery = array();
- // Add subtree query filter
- if ( !empty( $subtrees ) )
- {
- $subtreeQueryParts = array();
- foreach ( $subtrees as $subtreeNodeID )
- {
- $subtreeQueryParts[] = eZSolr::getMetaFieldName( 'path' ) . ':' . $subtreeNodeID;
- }
- $filterQuery[] = implode( ' OR ', $subtreeQueryParts );
- }
- // Add policy limitation query filter
- $policyLimitationFilterQuery = $this->policyLimitationFilterQuery();
- if ( $policyLimitationFilterQuery !== false )
- {
- $filterQuery[] = $policyLimitationFilterQuery;
- }
- // Add content class query filter
- $classLimitationFilter = $this->getContentClassFilterQuery( $contentClassID );
- if ( $classLimitationFilter !== null )
- {
- $filterQuery[] = $classLimitationFilter;
- }
- // Add section to query filter.
- if ( $sectionID )
- {
- $filterQuery[] = eZSolr::getMetaFieldName( 'section_id' ) . ':' . $sectionID;
- }
- $languageFilterQuery = $this->buildLanguageFilterQuery();
- if ( $languageFilterQuery )
- {
- $filterQuery[] = $languageFilterQuery;
- }
- $paramFilterQuery = $this->getParamFilterQuery( $params );
- if ( $paramFilterQuery )
- {
- $filterQuery[] = $paramFilterQuery;
- }
- //add raw filters
- if ( self::$FindINI->hasVariable( 'SearchFilters', 'RawFilterList' ) )
- {
- $rawFilters = self::$FindINI->variable( 'SearchFilters', 'RawFilterList' );
- if ( is_array( $rawFilters ) )
- {
- $filterQuery = array_merge( $filterQuery, $rawFilters );
- }
- }
- // Build and get facet query prameters.
- $facetQueryParamList = $this->buildFacetQueryParamList( $params );
- // return only text searcheable fields by passing NULL
- $fieldTypeExcludeList = $this->fieldTypeExludeList( NULL );
- // Create sort parameters based on the parameters.
- $sortParameter = $this->buildSortParameter( $params );
- $iniExtractionFields = self::$FindINI->variable( 'MoreLikeThis', 'ExtractionFields' );
- if ( $iniExtractionFields == 'general' )
- {
- // the collector field for all strings in an object
- $queryFields = array( 'ezf_df_text' );
- }
- else
- {
- //the array_unique below is necessary because attribute identifiers are not unique .. and we get as
- //much highlight snippets as there are duplicate attribute identifiers
- //these are also in the list of query fields (dismax, ezpublish) request handlers
- $queryFields = array_unique( $this->getClassAttributes( $contentClassID, false, $fieldTypeExcludeList ) );
- }
- //query type can vary for MLT q, or stream
- //if no valid match for the mlt query variant is obtained, it is treated as text
- $mltVariant = 'q';
- switch ( strtolower( $queryType ) )
- {
- case 'nid':
- $mltQuery = eZSolr::getMetaFieldName( 'node_id' ) . ':' . $query;
- $mltQuery .= ' AND ' . eZSolr::getMetaFieldName( 'installation_id' ) . ':' . $queryInstallationID;
- break;
- case 'oid':
- $mltQuery = eZSolr::getMetaFieldName( 'id' ) . ':' . $query;
- $mltQuery .= ' AND ' . eZSolr::getMetaFieldName( 'installation_id' ) . ':' . $queryInstallationID;
- break;
- case 'url':
- $mltVariant = 'stream.url';
- $mltQuery = $query;
- break;
- case 'text':
- default:
- $mltVariant = 'stream.body';
- $mltQuery = $query;
- break;
- }
- // fetch the mlt tuning parameters from ini settings
- $mintf = self::$FindINI->variable( 'MoreLikeThis', 'MinTermFreq' ) ? self::$FindINI->variable( 'MoreLikeThis', 'MinTermFreq' ) : 1;
- $mindf = self::$FindINI->variable( 'MoreLikeThis', 'MinDocFreq' ) ? self::$FindINI->variable( 'MoreLikeThis', 'MinDocFreq' ) : 1;
- $minwl = self::$FindINI->variable( 'MoreLikeThis', 'MinWordLength' ) ? self::$FindINI->variable( 'MoreLikeThis', 'MinWordLength' ) : 3;
- $maxwl = self::$FindINI->variable( 'MoreLikeThis', 'MaxWordLength' ) ? self::$FindINI->variable( 'MoreLikeThis', 'MaxWordLength' ) : 20;
- $maxqt = self::$FindINI->variable( 'MoreLikeThis', 'MaxQueryTerms' ) ? self::$FindINI->variable( 'MoreLikeThis', 'MaxQueryTerms' ) : 5;
- $boostmlt = self::$FindINI->variable( 'MoreLikeThis', 'BoostTerms' ) ? self::$FindINI->variable( 'MoreLikeThis', 'BoostTerms' ) : 'true';
- // @todo decide which of the hard-coded mlt parameters should become input parameters or ini settings
- return array_merge(
- array(
- $mltVariant => $mltQuery,
- 'start' => $offset,
- 'rows' => $limit,
- 'sort' => $sortParameter,
- 'indent' => 'on',
- 'version' => '2.2',
- 'mlt.match.include' => 'false', // exclude the doc itself
- 'mlt.mindf' => $mindf,
- 'mlt.mintf' => $mintf,
- 'mlt.maxwl' => $maxwl,
- 'mlt.minwl' => $minwl, //minimum wordlength
- 'mlt.maxqt' => $maxqt,
- 'mlt.interestingTerms' => 'details', // useful for debug output & tuning
- 'mlt.boost' => $boostmlt, // boost the highest ranking terms
- //'mlt.qf' => implode( ' ', $queryFields ),
- 'mlt.fl' => implode( ' ', $queryFields ),
- 'fl' =>
- eZSolr::getMetaFieldName( 'guid' ) . ' ' . eZSolr::getMetaFieldName( 'installation_id' ) . ' ' .
- eZSolr::getMetaFieldName( 'main_url_alias' ) . ' ' . eZSolr::getMetaFieldName( 'installation_url' ) . ' ' .
- eZSolr::getMetaFieldName( 'id' ) . ' ' . eZSolr::getMetaFieldName( 'main_node_id' ) . ' ' .
- eZSolr::getMetaFieldName( 'language_code' ) . ' ' . eZSolr::getMetaFieldName( 'name' ) .
- ' score ' . eZSolr::getMetaFieldName( 'published' ) . ' ' .
- eZSolr::getMetaFieldName( 'path_string' ) . ' ' . eZSolr::getMetaFieldName( 'is_invisible' ),
- 'fq' => $filterQuery,
- 'wt' => 'php' ),
- $facetQueryParamList );
- return $queryParams;
- }
- /**
- * Build sort parameter based on params provided.
- * @todo specify dedicated sorting fields
- * @param array Parameter list array. SortBy element contains sort
- * definition.
- *
- * @return string Sort description. Default sort string is 'score desc'.
- */
- protected function buildSortParameter( $parameterList )
- {
- $sortString = 'score desc';
- if ( !empty( $parameterList['SortBy'] ) )
- {
- $sortString = '';
- foreach ( $parameterList['SortBy'] as $field => $order )
- {
- // If array, set key and order from array values
- if ( is_array( $order ) )
- {
- $field = $order[0];
- $order = $order[1];
- }
- // Fixup field name
- switch( $field )
- {
- case 'score':
- case 'relevance':
- {
- $field = 'score';
- } break;
- case 'name':
- {
- $field = eZSolr::getMetaFieldName( 'sort_name', 'sort' );
- }break;
- case 'published':
- case 'modified':
- case 'class_name':
- case 'class_identifier':
- case 'section_id':
- {
- $field = eZSolr::getMetaFieldName( $field, 'sort' );
- } break;
- case 'author':
- {
- $field = eZSolr::getMetaFieldName( 'owner_name', 'sort' );
- } break;
- case 'class_id':
- {
- $field = eZSolr::getMetaFieldName( 'contentclass_id', 'sort' );
- } break;
- case 'path':
- {
- // Assume sorting on main node path_string as it is not possible to sort on multivalued fields due to Solr limitation
- $field = eZSolr::getMetaFieldName( 'main_path_string', 'sort' );
- } break;
- default:
- {
- $field = eZSolr::getFieldName( $field, false, 'sort' );
- if ( !$field )
- {
- eZDebug::writeNotice( 'Sort field does not exist in local installation, but may still be valid: ' .
- $facetDefinition['field'],
- __METHOD__ );
- continue;
- }
- } break;
- }
- // Fixup order name.
- switch( strtolower( $order ) )
- {
- case 'desc':
- case 'asc':
- {
- $order = strtolower( $order );
- } break;
- default:
- {
- eZDebug::writeDebug( 'Unrecognized sort order. Setting for order for default: "desc"',
- __METHOD__ );
- $order = 'desc';
- } break;
- }
- if ( $sortString !== '' )
- {
- $sortString .= ',';
- }
- $sortString .= $field . ' ' . $order;
- }
- }
- return $sortString;
- }
- /**
- * Build filter query from search filter parameter.
- * @deprecated api is way too limited now
- * @todo for eZ Find 2.0: rework this for recursive boolean combinations and a few more filter types, the possible combinations are almost infinite for pure Solr syntax
- * @param array Parameter list array.
- * The normal simple use is an array of type: array( '<field name>', <value> ).
- * The value may also be an array containing values.
- *
- * Examples :
- * <code>
- * $parameters = array( 'article/title:hello' );
- * $parameters = array( 'article/title' => 'hello' );
- * $parameters = array( 'article/rating' => '[1 TO 10]' );
- * $parameters = array( 'article/rating' => '[1 TO 10]',
- * 'article/body:hello' );
- * $parameters = array( 'or',
- * 'article/rating' => '[1 TO 10]',
- * 'article/body:hello' );
- * $parameters = array( 'or',
- * array( 'or',
- * 'article/rating' => '[1 TO 10]',
- * 'article/body:hello' ),
- * array( 'and',
- * 'article/rating' => '[10 TO 20]',
- * 'article/body:goodbye' ) );
- * </code>
- * @return string Filter Query. Null if no filter parameters are in
- * the $parameterList
- */
- protected function getParamFilterQuery( $parameterList )
- {
- if ( empty( $parameterList['Filter'] ) )
- {
- return null;
- }
- $booleanOperator = $this->getBooleanOperatorFromFilter( $parameterList['Filter'] );
- $filterQueryList = array();
- foreach ( $parameterList['Filter'] as $baseName => $value )
- {
- if ( !is_array( $value ) and strpos( $value, ':' ) !== false && is_numeric( $baseName ) )
- {
- // split at the first colon, leave the rest intact
- list( $baseName, $value ) = explode( ':', $value, 2 );
- }
- if ( is_array( $value ) )
- {
- $filterQueryList[] = '( ' . $this->getParamFilterQuery( array( 'Filter' => $value ) ) . ' )';
- }
- else
- {
- if ( $value !== null )
- {
- // Exception to the generic processing : when a subtree filter is applied, the search plugin needs to be notified
- // to be able to pick the right URL for objects, the main URL of which is located outside the subtree filter scope.
- if ( $baseName == 'path' )
- {
- if ( isset( $this->searchPluginInstance->postSearchProcessingData['subtree_array'] ) )
- $this->searchPluginInstance->postSearchProcessingData['subtree_array'][] = $value;
- else
- $this->searchPluginInstance->postSearchProcessingData['subtree_array'] = array( $value );
- }
- // Get internal field name. Returns a class ID filter if applicable. Add it as an implicit filter if needed.
- $baseNameInfo = eZSolr::getFieldName( $baseName, true, 'filter' );
- if ( is_array( $baseNameInfo ) and isset( $baseNameInfo['contentClassId'] ) )
- {
- $filterQueryList[] = '( ' . eZSolr::getMetaFieldName( 'contentclass_id' ) . ':' . $baseNameInfo['contentClassId'] . ' AND ' . $baseNameInfo['fieldName'] . ':' . $value . ' )' ;
- }
- else
- {
- // Note that $value needs to be escaped if it unintentionally contains Solr reserved characters
- $filterQueryList[] = $baseNameInfo . ':' . $value;
- }
- }
- }
- }
- return implode( " $booleanOperator ", $filterQueryList );
- }
- /**
- * Identifies which boolean operator to use when building the filter string ( fq parameter in the final Solr raw request )
- * Removes the operator from the array, if existing.
- *
- * @param array &$filter Filter array processed in self::getParamFilterQuery
- * @returns string The boolean operator to use. Default to 'AND'
- * @see ezfeZPSolrQueryBuilder::getParamFilterQuery
- */
- protected function getBooleanOperatorFromFilter( &$filter )
- {
- if ( isset( $filter[0] ) and is_string( $filter[0] ) and in_array( $filter[0], self::$allowedBooleanOperators ) )
- {
- $retVal = strtoupper( $filter[0] );
- unset( $filter[0] );
- return $retVal;
- }
- else
- return self::DEFAULT_BOOLEAN_OPERATOR;
- }
- /**
- * Analyze the string, and decide if quotes should be added or not.
- *
- * @param string String
- *
- * @return string String with quotes added if needed.
- * @deprecated
- */
- static function quoteIfNeeded( $value )
- {
- $quote = '';
- if ( strpos( $value, ' ' ) !== false )
- {
- $quote = '"';
- if ( strpos( trim( $value ), '(' ) === 0 )
- {
- $quote = '';
- }
- }
- return $quote . $value . $quote;
- }
- /**
- * Build facet parameter list. This function extracts the facet parameter from
- * the ezfeZPSolrQueryBuilder::search( ...,$params parameter.
- *
- * @todo specify dedicated facet fields (may be mapped to sort fields)
- *
- * @param array Parameter list array
- *
- * @return array List of Facet query parameter. The facet parameter corrosponds to
- * the parameters defined here : http://wiki.apache.org/solr/SimpleFacetParameters
- */
- protected function buildFacetQueryParamList( $parameterList )
- {
- $parameterList = array_change_key_case( $parameterList, CASE_LOWER );
- $queryParamList = array();
- if ( empty( $parameterList['facet'] ) )
- {
- return $queryParamList;
- }
- // Loop through facet definitions, and build facet query.
- foreach ( $parameterList['facet'] as $facetDefinition )
- {
- if ( empty( $facetDefinition['field'] …
Large files files are truncated, but you can click here to view the full file