wordlist_cache = []; // for caching queries to the LRU-cache-list. } public function register_search($words) { $words = strtolower($words); $words = addslashes($words); $words = preg_split("/\s/", $words); foreach ($words as $word) { $word = trim($word); $cant = $this->mDb->getOne("SELECT COUNT(*) FROM `" . BIT_DB_PREFIX . "search_stats` WHERE `term`=?", [ $word ], ); $query = $cant ? "UPDATE `" . BIT_DB_PREFIX . "search_stats` SET `hits`= `hits` + 1 WHERE `term`=?" : "INSERT INTO `" . BIT_DB_PREFIX . "search_stats` (`term`,`hits`) VALUES (?,1)"; $result = $this->mDb->query($query, [ $word ]); } } public function find( &$pParamHash ) { // $where, $words, $offset, $max_records, $plUsePart = false) { $pParamHash['words'] = preg_split("/[\W]+/", strtolower($pParamHash['words']), -1, PREG_SPLIT_NO_EMPTY); $wordList = $this->get_wordlist_from_syllables( $pParamHash['words'] ); if( !empty( $wordList ) ) { $pParamHash['words'] = array_unique( array_merge( $pParamHash['words'], $wordList ) ); } $res = $this->find_exact_generic( $pParamHash ); return $res; } /* * This function checks the search_syllable table to see how old the "syllable" is * If the syllable is to old or doesn't exist, it refreshes the syllable/word list stored in search_words * Then, it get a list of words from the search_words table and returns an array of them */ public function get_wordlist_from_syllables($syllables) { global $gBitSystem; $search_syll_age = $gBitSystem->getConfig( 'search_syll_age', SEARCH_PKG_NAME ); $ret = []; foreach($syllables as $syllable) { $bindvars = [ $syllable ]; $age = time() - (int)$this->mDb->getOne( "select `last_updated` from `" . BIT_DB_PREFIX . "search_syllable` where `syllable`=?", $bindvars, ); if(!$age || $age > ($search_syll_age * 3600)) {// older than search_syll_age hours $a = $this->refresh_lru_wordlist($syllable); } $lruList = $this->get_lru_wordlist($syllable); if (is_array($lruList)) { $ret = array_merge($ret, $lruList); } // update lru last used value (Used to purge oldest last used records) $now = time(); $this->mDb->query("update `" . BIT_DB_PREFIX . "search_syllable` set `last_used`=? where `syllable`=?", [ (int) $now, $syllable ], ); } return $ret; } public function get_lru_wordlist($syllable) { $ret = []; if(!isset($this->wordlist_cache[$syllable])) { $query = "select `searchword` from `" . BIT_DB_PREFIX . "search_words` where `syllable`=?"; $result = $this->mDb->query($query, [ $syllable ]); if ($result->RecordCount() > 0) { while ($res = $result->fetchRow()) { $this->wordlist_cache[$syllable][]=$res["searchword"]; } $ret = $this->wordlist_cache[$syllable]; } } return $ret; } public function refresh_lru_wordlist($syllable) { global $gBitSystem; $search_max_syllwords = $gBitSystem->getConfig( 'search_max_syllwords', SEARCH_PKG_NAME );; $search_lru_length = $gBitSystem->getConfig( 'search_lru_length', SEARCH_PKG_NAME );; $search_lru_purge_rate = $gBitSystem->getConfig( 'search_lru_purge_rate', SEARCH_PKG_NAME ); $ret = []; // delete from wordlist and lru list $this->mDb->query("delete from `".BIT_DB_PREFIX."search_words` where `syllable`=?", [ $syllable ],-1,-1); $this->mDb->query("delete from `".BIT_DB_PREFIX."search_syllable` where `syllable`=?", [ $syllable ],-1,-1); if (!isset($search_max_syllwords)) { $search_max_syllwords = 100; } $query = "SELECT `searchword`, SUM(`i_count`) AS `cnt` FROM `" . BIT_DB_PREFIX . "search_index` WHERE `searchword` LIKE ? GROUP BY `searchword` ORDER BY 2 desc"; $result = $this->mDb->query($query, [ "%$syllable%" ], $search_max_syllwords); // search_max_syllwords: how many different search_words that contain the syllable are taken into account?. Sortet by number of occurences. while ($res = $result->fetchRow()) { $ret[] = $res["searchword"]; } // cache this long running query foreach($ret as $searchword) { $this->mDb->query("INSERT INTO `" . BIT_DB_PREFIX . "search_words` (`syllable`,`searchword`) VALUES (?,?)", [ $syllable, $searchword ], -1, -1, ); } // set lru list parameters $now = time(); $this->mDb->query("INSERT INTO `" . BIT_DB_PREFIX . "search_syllable`(`syllable`,`last_used`,`last_updated`) values (?,?,?)", [ $syllable, (int) $now, (int) $now ], ); // at random rate: check length of lru list and purge these that // have not been used for long time. This is what a lru list // basically does list($usec, $sec) = explode(" ", microtime()); srand (ceil($sec + 100 * $usec)); if(rand(1, $search_lru_purge_rate) == 1) { $lrulength = $this->mDb->getOne("SELECT COUNT(*) FROM `" . BIT_DB_PREFIX . "search_syllable`", [], ); if ($lrulength > $search_lru_length) { // only purge if lru list is too long. //purge oldest $oldwords = []; $diff = $lrulength - $search_lru_length; $query = "select `syllable` from `".BIT_DB_PREFIX."search_syllable` ORDER BY `last_used` asc"; $result = $this->mDb->query($query, [], $diff); while ($res = $result->fetchRow()) { $oldwords[]=$res["syllable"]; } foreach($oldwords as $oldword) { $this->mDb->query("delete from `" . BIT_DB_PREFIX . "search_words` where `syllable`=?", [ $oldword ], -1, -1, ); $this->mDb->query("delete from `" . BIT_DB_PREFIX . "search_syllable` where `syllable`=?", [ $oldword ], -1, -1, ); } } } return $ret; } public function find_with_or($allowed, $selectSql, $joinSql, $whereSql, $bindVars,&$pParamHash) { // Putting in the below hack because mssql cannot select distinct on a text blob column. $qPlaceHolders1 = implode(',', array_fill(0, count($pParamHash['words']), '?')); $bindVars = array_merge( $pParamHash['words'], $allowed ); LibertyContent::getServicesSql( 'search_extra_sql_function', $selectSql, $joinSql, $whereSql, $bindVars ); $ret = []; $query = "SELECT lc.`content_id`, lc.`title`, lc.`format_guid`, lc.`content_type_guid`, COALESCE(lch.`hits`,0) AS hits, lc.`created`, lc.`last_modified`, lc.`data`, COALESCE(( SELECT SUM(i_count) FROM `" . BIT_DB_PREFIX . "search_index` si WHERE si.`content_id`=lc.`content_id` AND si.`searchword` IN (" . $qPlaceHolders1 . ") ),0) AS relevancy $selectSql FROM `" . BIT_DB_PREFIX . "liberty_content` lc LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`) $joinSql WHERE ( SELECT SUM(i_count) FROM `" . BIT_DB_PREFIX . "search_index` si WHERE si.`content_id`=lc.`content_id` AND si.`searchword` IN (" . $qPlaceHolders1 . ") GROUP BY si.`content_id` )>0 $whereSql ORDER BY 9 DESC, 5 DESC "; $querycant = "SELECT COUNT(*) FROM `" . BIT_DB_PREFIX . "liberty_content` lc LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`) $joinSql WHERE ( SELECT SUM(i_count) FROM `" . BIT_DB_PREFIX . "search_index` si WHERE si.`content_id`=lc.`content_id` AND si.`searchword` IN (" . $qPlaceHolders1 . ") GROUP BY si.`content_id` )>0 $whereSql"; $result = $this->mDb->query( $query, array_merge( $pParamHash['words'] ,$bindVars), $pParamHash['max_records'], $pParamHash['offset'] ); $pParamHash['cant'] = $this->mDb->getOne( $querycant, $bindVars ); while ($res = $result->fetchRow()) { $res['href'] = BIT_ROOT_URL . "index.php?content_id=" . $res['content_id']; $ret[] = $res; } return $ret; } public function find_with_and($allowed, $selectSql, $joinSql, $whereSql, $bindVars, &$pParamHash) { // Make a slot for the search word. $bindVars[0] = null; $bindVars = array_merge( $bindVars, $allowed ); LibertyContent::getServicesSql( 'search_extra_sql_function', $selectSql, $joinSql, $whereSql, $bindVars ); $ret = []; $first = true; foreach($pParamHash['words'] as $word) { $query = "SELECT lc.`content_id` AS hash_key, lc.`content_id`, lc.`title`, lc.`format_guid`, lc.`content_type_guid`, COALESCE(lch.`hits`,0) AS hits, lc.`created`, lc.`last_modified`, lc.`data`, si.`i_count` AS relevancy $selectSql FROM `" . BIT_DB_PREFIX . "liberty_content` lc LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`) $joinSql INNER JOIN `".BIT_DB_PREFIX."search_index` si ON (si.`content_id`=lc.`content_id` AND si.`searchword` = ? ) WHERE si.`i_count` > 0 $whereSql ORDER BY relevancy DESC, hits DESC "; $bindVars[0] = $word; $result = $this->mDb->getAssoc( $query, $bindVars ); if ($first) { $ret = $result; $first = false; } else { $this->mergeResults($ret, $result); } } /* count it */ $pParamHash['cant'] = count($ret); /* Sort it */ uasort($ret, function($a, $b) { $rel = $b['relevancy'] - $a['relevancy']; return $rel == 0 ? $b['hits'] - $a['hits'] : $rel; }); /* slice it */ $ret = array_slice($ret, $pParamHash['offset'], $pParamHash['offset'] + $pParamHash['max_records']); /* Set the hrefs. */ foreach ($ret as $content_id => $data) { $ret[$content_id]['href'] = BIT_ROOT_URL . "index.php?content_id=" . $data['content_id']; } return $ret; } public function find_exact_generic( &$pParamHash ) { global $gPage, $gBitSystem, $gLibertySystem, $gBitDbType; $allowed = []; $ret = []; foreach( $gLibertySystem->mContentTypes as $contentType ) { if (( $pParamHash['content_type_guid'] == $contentType["content_type_guid"] or $pParamHash['content_type_guid'] == "" ) // pages ? and $this->has_permission($contentType["content_type_guid"]) and ( ! $gBitSystem->getConfig('search_restrict_types') || $gBitSystem->getConfig('search_pkg_'.$contentType["content_type_guid"]) ) ) { $allowed[] = $contentType["content_type_guid"]; } } if (count($allowed) > 0 && count($pParamHash['words']) > 0) { $selectSql = ''; $joinSql = ''; $whereSql = " AND lc.`content_type_guid` IN (" . implode(',', array_fill(0, count($allowed), '?')) . ") "; $bindVars = []; $ret = isset($pParamHash['useAnd']) && $pParamHash['useAnd'] ? $this->find_with_and($allowed, $selectSql, $joinSql, $whereSql, $bindVars, $pParamHash) : $this->find_with_or($allowed, $selectSql, $joinSql, $whereSql, $bindVars, $pParamHash); } else { $pParamHash['cant'] = 0; $ret = []; } return $ret; } public function mergeResults(&$ret, $result) { // Remove those that don't overlap or update relevance foreach ($ret as $content_id => $data) { if (!isset($result[$content_id])) { unset($ret[$content_id]); } else { $ret[$content_id]['relevancy'] += $result[$content_id]['relevancy']; } } } public static function has_permission($pContentType = null) { global $gBitUser, $gLibertySystem; if ( ! empty( $pContentType ) ) { $object = LibertyBase::getLibertyObject(1, $pContentType, false); if ( ! empty( $object ) ) { // Note that we can't do verify access here because // we are using a generic object but we can at least get a // basic permission check here. return $object->hasViewPermission(false); } } return false; } } # class SearchLib