summaryrefslogtreecommitdiff
path: root/includes/classes
diff options
context:
space:
mode:
Diffstat (limited to 'includes/classes')
-rwxr-xr-xincludes/classes/SearchLib.php341
-rwxr-xr-xincludes/classes/SearchStatsLib.php57
2 files changed, 398 insertions, 0 deletions
diff --git a/includes/classes/SearchLib.php b/includes/classes/SearchLib.php
new file mode 100755
index 0000000..024d340
--- /dev/null
+++ b/includes/classes/SearchLib.php
@@ -0,0 +1,341 @@
+<?php
+/**
+ * $Header$
+ *
+ * @copyright (c) 2004 bitweaver.org
+ * Copyright (c) 2003 tikwiki.org
+ * Copyright (c) 2002-2003, Luis Argerich, Garland Foster, Eduardo Polidor, et. al.
+ * All Rights Reserved. See below for details and a complete list of authors.
+ * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See http://www.gnu.org/copyleft/lesser.html for details
+ *
+ * $Id$
+ * @author Luis Argerich (lrargerich@yahoo.com)
+ * @package search
+ */
+
+/**
+ * @package search
+ */
+
+namespace Bitweaver\Search;
+use Bitweaver\BitBase;
+use Bitweaver\Liberty\LibertyBase;
+
+class SearchLib extends BitBase {
+
+ public $wordlist_cache;
+
+ public function __construct() {
+ parent::__construct();
+ $this->wordlist_cache = []; // for caching queries to the LRU-cache-list.
+ }
+
+ public function register_search($words) {
+ $words = strtolower($words);
+ $words = addslashes($words);
+ $words = preg_split("/\s/", $words);
+ foreach ($words as $word) {
+ $word = trim($word);
+ $cant = $this->mDb->getOne("SELECT COUNT(*) FROM `" . BIT_DB_PREFIX .
+ "search_stats` WHERE `term`=?", [ $word ]);
+ $query = $cant
+ ? "UPDATE `" . BIT_DB_PREFIX . "search_stats` SET `hits`= `hits` + 1 WHERE `term`=?"
+ : "INSERT INTO `" . BIT_DB_PREFIX . "search_stats` (`term`,`hits`) VALUES (?,1)";
+ $result = $this->mDb->query($query, [ $word ]);
+ }
+ }
+
+ public function find( &$pParamHash ) { // $where, $words, $offset, $max_records, $plUsePart = false) {
+ $pParamHash['words'] = preg_split("/[\W]+/", strtolower($pParamHash['words']), -1, PREG_SPLIT_NO_EMPTY);
+ if ( isset($pParamHash['$plUsePart']) && $pParamHash['$plUsePart'] ) {
+ $wordList = $this->get_wordlist_from_syllables( $pParamHash['words'] );
+ if ([ $wordList ] ) {
+ $pParamHash['words'] = array_merge( $pParamHash['words'], $wordList );
+ }
+ }
+ $res = $this->find_exact_generic( $pParamHash );
+ return $res;
+ }
+
+ /*
+ * This function checks the search_syllable table to see how old the "syllable" is
+ * If the syllable is to old or doesn't exist, it refreshes the syllable/word list stored in search_words
+ * Then, it get a list of words from the search_words table and returns an array of them
+ */
+ public function get_wordlist_from_syllables($syllables) {
+ global $gBitSystem;
+ $search_syll_age = $gBitSystem->getConfig( 'search_syll_age', SEARCH_PKG_NAME );
+ $ret = [];
+ foreach($syllables as $syllable) {
+ $bindvars = [ $syllable ];
+ $age = time() - $this->mDb->getOne(
+ "select `last_updated` from `" . BIT_DB_PREFIX . "search_syllable` where `syllable`=?",
+ $bindvars);
+ if(!$age || $age > ($search_syll_age * 3600)) {// older than search_syll_age hours
+ $a = $this->refresh_lru_wordlist($syllable);
+ }
+ $lruList = $this->get_lru_wordlist($syllable);
+ if (is_array($lruList)) {
+ $ret = array_merge($ret, $lruList);
+ }
+ // update lru last used value (Used to purge oldest last used records)
+ $now = time();
+ $this->mDb->query("update `" . BIT_DB_PREFIX . "search_syllable` set `last_used`=? where `syllable`=?",
+ [ (int) $now, $syllable ]);
+ }
+ return $ret;
+ }
+
+ public function get_lru_wordlist($syllable) {
+ $ret = [];
+ if(!isset($this->wordlist_cache[$syllable])) {
+ $query = "select `searchword` from `" . BIT_DB_PREFIX . "search_words` where `syllable`=?";
+ $result = $this->mDb->query($query, [ $syllable ]);
+ if ($result->RecordCount() > 0) {
+ while ($res = $result->fetchRow()) {
+ $this->wordlist_cache[$syllable][]=$res["searchword"];
+ }
+ $ret = $this->wordlist_cache[$syllable];
+ }
+ }
+ return $ret;
+ }
+
+ public function refresh_lru_wordlist($syllable) {
+ global $gBitSystem;
+ $search_max_syllwords = $gBitSystem->getConfig( 'search_max_syllwords', SEARCH_PKG_NAME );;
+ $search_lru_length = $gBitSystem->getConfig( 'search_lru_length', SEARCH_PKG_NAME );;
+ $search_lru_purge_rate = $gBitSystem->getConfig( 'search_lru_purge_rate', SEARCH_PKG_NAME );
+ $ret = [];
+
+ // delete from wordlist and lru list
+ $this->mDb->query("delete from `".BIT_DB_PREFIX."search_words` where `syllable`=?", [ $syllable ],-1,-1);
+ $this->mDb->query("delete from `".BIT_DB_PREFIX."search_syllable` where `syllable`=?", [ $syllable ],-1,-1);
+ if (!isset($search_max_syllwords)) {
+ $search_max_syllwords = 100;
+ }
+ $query = "SELECT `searchword`, SUM(`i_count`) AS `cnt` FROM `" . BIT_DB_PREFIX .
+ "search_index` WHERE `searchword` LIKE ? GROUP BY `searchword` ORDER BY 2 desc";
+ $result = $this->mDb->query($query, [ "%$syllable%" ], $search_max_syllwords); // search_max_syllwords: how many different search_words that contain the syllable are taken into account?. Sortet by number of occurences.
+ while ($res = $result->fetchRow()) {
+ $ret[] = $res["searchword"];
+ }
+ // cache this long running query
+ foreach($ret as $searchword) {
+ $this->mDb->query("INSERT INTO `" . BIT_DB_PREFIX .
+ "search_words` (`syllable`,`searchword`) VALUES (?,?)",
+ [ $syllable, $searchword ], -1, -1);
+ }
+ // set lru list parameters
+ $now = time();
+ $this->mDb->query("INSERT INTO `" . BIT_DB_PREFIX .
+ "search_syllable`(`syllable`,`last_used`,`last_updated`) values (?,?,?)",
+ [ $syllable, (int) $now, (int) $now ]);
+
+ // at random rate: check length of lru list and purge these that
+ // have not been used for long time. This is what a lru list
+ // basically does
+ list($usec, $sec) = explode(" ", microtime());
+ srand (ceil($sec + 100 * $usec));
+ if(rand(1, $search_lru_purge_rate) == 1) {
+ $lrulength = $this->mDb->getOne("SELECT COUNT(*) FROM `" . BIT_DB_PREFIX .
+ "search_syllable`", []);
+ if ($lrulength > $search_lru_length) { // only purge if lru list is too long.
+ //purge oldest
+ $oldwords = [];
+ $diff = $lrulength - $search_lru_length;
+ $query = "select `syllable` from `".BIT_DB_PREFIX."search_syllable` ORDER BY `last_used` asc";
+ $result = $this->mDb->query($query, [], $diff);
+ while ($res = $result->fetchRow()) {
+ $oldwords[]=$res["syllable"];
+ }
+ foreach($oldwords as $oldword) {
+ $this->mDb->query("delete from `" . BIT_DB_PREFIX .
+ "search_words` where `syllable`=?", [ $oldword ], -1, -1);
+ $this->mDb->query("delete from `" . BIT_DB_PREFIX .
+ "search_syllable` where `syllable`=?", [ $oldword ], -1, -1);
+ }
+
+ }
+ }
+ return $ret;
+ }
+
+ public function find_with_or($allowed, $selectSql, $joinSql, $whereSql, $bindVars,&$pParamHash) {
+ // Putting in the below hack because mssql cannot select distinct on a text blob column.
+ $qPlaceHolders1 = implode(',', array_fill(0, count($pParamHash['words']), '?'));
+ $bindVars = array_merge( $pParamHash['words'], $allowed );
+// $this->getServicesSql( 'content_list_sql_function', $selectSql, $joinSql, $whereSql, $bindVars );
+ $ret = [];
+ $query = "SELECT
+ lc.`content_id`,
+ lc.`title`,
+ lc.`format_guid`,
+ lc.`content_type_guid`,
+ COALESCE(lch.`hits`,0) AS hits,
+ lc.`created`,
+ lc.`last_modified`,
+ lc.`data`,
+ COALESCE((
+ SELECT SUM(i_count)
+ FROM `" . BIT_DB_PREFIX . "search_index` si
+ WHERE si.`content_id`=lc.`content_id` AND si.`searchword` IN (" . $qPlaceHolders1 . ")
+ ),0) AS relevancy
+ $selectSql
+ FROM `" . BIT_DB_PREFIX . "liberty_content` lc
+ LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`)
+ $joinSql
+ WHERE (
+ SELECT SUM(i_count)
+ FROM `" . BIT_DB_PREFIX . "search_index` si
+ WHERE si.`content_id`=lc.`content_id`
+ AND si.`searchword` IN (" . $qPlaceHolders1 . ")
+ GROUP BY
+ si.`content_id`
+ )>0 $whereSql
+ ORDER BY 9 DESC, 5 DESC
+ ";
+ $querycant = "SELECT
+ COUNT(*)
+ FROM `" . BIT_DB_PREFIX . "liberty_content` lc
+ LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`)
+ $joinSql
+ WHERE (
+ SELECT SUM(i_count)
+ FROM `" . BIT_DB_PREFIX . "search_index` si
+ WHERE si.`content_id`=lc.`content_id`
+ AND si.`searchword` IN (" . $qPlaceHolders1 . ")
+ GROUP BY
+ si.`content_id`
+ )>0 $whereSql";
+ $result = $this->mDb->query( $query, array_merge( $pParamHash['words'] ,$bindVars), $pParamHash['max_records'], $pParamHash['offset'] );
+ $pParamHash['cant'] = $this->mDb->getOne( $querycant, $bindVars );
+ while ($res = $result->fetchRow()) {
+ $res['href'] = BIT_ROOT_URL . "index.php?content_id=" . $res['content_id'];
+ $ret[] = $res;
+ }
+ return $ret;
+ }
+
+ public function find_with_and($allowed, $selectSql, $joinSql, $whereSql, $bindVars, &$pParamHash) {
+ // Make a slot for the search word.
+ $bindVars[0] = null;
+ $bindVars = array_merge( $bindVars, $allowed );
+// LibertyContent::getServicesSql( 'content_list_sql_function', $selectSql, $joinSql, $whereSql, $bindVars );
+
+ $ret = [];
+ $first = true;
+ foreach($pParamHash['words'] as $word) {
+ $query = "SELECT lc.`content_id` AS hash_key,
+ lc.`content_id`,
+ lc.`title`,
+ lc.`format_guid`,
+ lc.`content_type_guid`,
+ COALESCE(lch.`hits`,0) AS hits,
+ lc.`created`,
+ lc.`last_modified`,
+ lc.`data`,
+ si.`i_count` AS relevancy
+ $selectSql
+ FROM `" . BIT_DB_PREFIX . "liberty_content` lc
+ LEFT OUTER JOIN `".BIT_DB_PREFIX."liberty_content_hits` lch ON (lc.`content_id` = lch.`content_id`)
+ $joinSql
+ INNER JOIN `".BIT_DB_PREFIX."search_index` si ON (si.`content_id`=lc.`content_id` AND si.`searchword` = ? )
+ WHERE `i_count` > 0 $whereSql
+ ORDER BY 9 DESC, 5 DESC
+ ";
+ $bindVars[0] = $word;
+ $result = $this->mDb->getAssoc( $query, $bindVars );
+ if ($first) {
+ $ret = $result;
+ $first = false;
+ }
+ else {
+ $this->mergeResults($ret, $result);
+ }
+ }
+ /* count it */
+ $pParamHash['cant'] = count($ret);
+
+ /* Sort it */
+ uasort($ret, 'search_relevance_sort');
+
+ /* slice it */
+ $ret = array_slice($ret, $pParamHash['offset'], $pParamHash['offset'] + $pParamHash['max_records']);
+
+ /* Set the hrefs. */
+ foreach ($ret as $content_id => $data) {
+ $ret[$content_id]['href'] = BIT_ROOT_URL . "index.php?content_id=" . $data['content_id'];
+ }
+
+ return $ret;
+ }
+
+ public function find_exact_generic( &$pParamHash ) {
+ global $gPage, $gBitSystem, $gLibertySystem, $gBitDbType;
+ $allowed = [];
+ $ret = [];
+ foreach( $gLibertySystem->mContentTypes as $contentType ) {
+ if (( $pParamHash['content_type_guid'] == $contentType["content_type_guid"] or $pParamHash['content_type_guid'] == "" ) // pages ?
+ and $this->has_permission($contentType["content_type_guid"])
+ and ( ! $gBitSystem->getConfig('search_restrict_types') ||
+ $gBitSystem->getConfig('search_pkg_'.$contentType["content_type_guid"]) ) ) {
+ $allowed[] = $contentType["content_type_guid"];
+ }
+ }
+
+ if (count($allowed) > 0 && count($pParamHash['words']) > 0) {
+ $selectSql = '';
+ $joinSql = '';
+ $whereSql = " AND lc.`content_type_guid` IN (" . implode(',', array_fill(0, count($allowed), '?')) . ") ";
+ $bindVars = [];
+
+ $ret = isset($pParamHash['useAnd']) && $pParamHash['useAnd']
+ ? $this->find_with_and($allowed, $selectSql, $joinSql, $whereSql, $bindVars, $pParamHash)
+ : $this->find_with_or($allowed, $selectSql, $joinSql, $whereSql, $bindVars, $pParamHash);
+ } else {
+ $pParamHash['cant'] = 0;
+ $ret = [];
+ }
+ return $ret;
+ }
+
+ public function mergeResults(&$ret, $result) {
+ // Remove those that don't overlap or update relevance
+ foreach ($ret as $content_id => $data) {
+ if (!isset($result[$content_id])) {
+ unset($ret[$content_id]);
+ }
+ else {
+ $ret[$content_id]['relevancy'] += $result[$content_id]['relevancy'];
+ }
+ }
+ }
+
+ public static function has_permission($pContentType = null) {
+ global $gBitUser, $gLibertySystem;
+
+ if ( ! empty( $pContentType ) ) {
+ $object = LibertyBase::getLibertyObject(1, $pContentType, false);
+ if ( ! empty( $object ) ) {
+ // Note that we can't do verify access here because
+ // we are using a generic object but we can at least get a
+ // basic permission check here.
+ return $object->hasViewPermission(false);
+ }
+ }
+
+ return false;
+ }
+
+} # class SearchLib
+
+if (!defined('search_relevance_sort')) {
+ function search_relevance_sort($a, $b) {
+ $rel = $b['relevancy'] - $a['relevancy'];
+ if ($rel == 0) {
+ $rel = $b['hits'] - $a['hits'];
+ }
+ return $rel;
+ }
+} \ No newline at end of file
diff --git a/includes/classes/SearchStatsLib.php b/includes/classes/SearchStatsLib.php
new file mode 100755
index 0000000..de5bd9e
--- /dev/null
+++ b/includes/classes/SearchStatsLib.php
@@ -0,0 +1,57 @@
+<?php
+/**
+ * $Header$
+ *
+ * @copyright (c) 2004 bitweaver.org
+ * Copyright (c) 2003 tikwiki.org
+ * Copyright (c) 2002-2003, Luis Argerich, Garland Foster, Eduardo Polidor, et. al.
+ * All Rights Reserved. See below for details and a complete list of authors.
+ * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See http://www.gnu.org/copyleft/lesser.html for details
+ *
+ * $Id$
+ * @author Luis Argerich (lrargerich@yahoo.com)
+ * @package search
+ */
+
+namespace Bitweaver\Search;
+use Bitweaver\BitBase;
+
+/**
+ * @package search
+ * @subpackage SearchStatsLib
+ */
+class SearchStatsLib extends BitBase {
+
+ function clear_search_stats() {
+ $query = "DELETE FROM `".BIT_DB_PREFIX."search_stats";
+ $result = $this->mDb->query($query,[]);
+ }
+
+ function list_search_stats($offset, $max_records, $sort_mode, $find) {
+
+ if ($find) {
+ $mid = " WHERE (UPPER(`term`) LIKE ?)";
+ $bindvars = [ "%".strtoupper( $find )."%" ];
+ } else {
+ $mid = "";
+ $bindvars = [];
+ }
+
+ $query = "SELECT * FROM `".BIT_DB_PREFIX."search_stats` $mid ORDER BY ".$this->mDb->convertSortmode($sort_mode);
+ $query_cant = "SELECT COUNT(*) FROM `".BIT_DB_PREFIX."search_stats` $mid";
+ $result = $this->mDb->query($query,$bindvars,$max_records,$offset);
+ $cant = $this->mDb->getOne($query_cant,$bindvars);
+ $ret = [];
+
+ while ($res = $result->fetchRow()) {
+ $ret[] = $res;
+ }
+
+ $retval = [];
+ $retval["data"] = $ret;
+ $retval["cant"] = $cant;
+ return $retval;
+ }
+}
+
+$searchstatslib = new SearchStatsLib();