diff options
| author | Sean Lee <seannerd@users.sourceforge.net> | 2006-02-20 04:56:08 +0000 |
|---|---|---|
| committer | Sean Lee <seannerd@users.sourceforge.net> | 2006-02-20 04:56:08 +0000 |
| commit | 37647b42396d5b336c181d3e484abc236ba730c6 (patch) | |
| tree | 256271d4b7963bb1320c4f4f1ad5a915a7d98c3f | |
| parent | 7e3b1627bc402f5c350bd59c0569c7d4d9ef1ec3 (diff) | |
| download | search-37647b42396d5b336c181d3e484abc236ba730c6.tar.gz search-37647b42396d5b336c181d3e484abc236ba730c6.tar.bz2 search-37647b42396d5b336c181d3e484abc236ba730c6.zip | |
Merge recent changes to HEAD
| -rw-r--r-- | cmd_line_reindex.php | 54 | ||||
| -rw-r--r-- | refresh_functions.php | 97 | ||||
| -rw-r--r-- | search_lib.php | 12 |
3 files changed, 74 insertions, 89 deletions
diff --git a/cmd_line_reindex.php b/cmd_line_reindex.php index bf7b9d0..800ddf2 100644 --- a/cmd_line_reindex.php +++ b/cmd_line_reindex.php @@ -48,35 +48,39 @@ require_once( SEARCH_PKG_PATH.'refresh_functions.php'); $whatToIndex = "pages"; $unindexedOnly = false; $silent = false; -if ($argc > 1) { - for ($i = 1; $i < $argc; $i++) { - $arg = strtolower($argv[$i]); - switch ($arg) { - case "silent" : - $silent = true; - break; - case "unindexedonly" : - $unindexedOnly = true; // only index content that hasn't been indexed yet - break; - default : - $whatToIndex = $arg; - break; +if (isset($argc)) { // we are running from the command line. + if ($argc > 1) { + for ($i = 1; $i < $argc; $i++) { + $arg = strtolower($argv[$i]); + switch ($arg) { + case "silent" : + $silent = true; + break; + case "unindexedonly" : + $unindexedOnly = true; // only index content that hasn't been indexed yet + break; + default : + $whatToIndex = $arg; + break; + } } + $time_start = microtime_float(); + if (!$silent) echo "\nBeginning Reindex of $whatToIndex ...\n"; + if (!$silent && $unindexedOnly) echo "Warning: unindexed only flag set. Will break MySQL 3.x because of sub-selects\n"; + $count = rebuild_index($whatToIndex, $unindexedOnly); + $time_end = microtime_float(); + $time = number_format($time_end - $time_start, 4); + if (!$silent) echo "Index rebuild complete.\n"; + if (!$silent) echo "Attempted to index $count pieces of content\n"; + if (!$silent) echo "(Note: Some content may not be indexable. This is normal)\n"; + if (!$silent) echo "Execution time: $time seconds\n"; + die(); } +} else { + // Don't allow this to be run from the web. + header("location: ../index.php" ); } -$time_start = microtime_float(); -if (!$silent) echo "\nBeginning Reindex of $whatToIndex ...\n"; -if (!$silent && $unindexedOnly) echo "Warning: unindexed only flag set. Will break MySQL 3.x because of sub-selects\n"; -$count = rebuild_index($whatToIndex, $unindexedOnly); -$time_end = microtime_float(); -$time = number_format($time_end - $time_start, 4); -if (!$silent) echo "Index rebuild complete.\n"; -if (!$silent) echo "Attempted to index $count pieces of content\n"; -if (!$silent) echo "(Note: Some content may not be indexable. This is normal)\n"; -if (!$silent) echo "Execution time: $time seconds\n"; -die(); - function microtime_float() { list($usec, $sec) = explode(" ", microtime()); return ((float)$usec + (float)$sec); diff --git a/refresh_functions.php b/refresh_functions.php index 6f5c4f8..bb36bf3 100644 --- a/refresh_functions.php +++ b/refresh_functions.php @@ -1,6 +1,6 @@ <?php /** - * $Header: /cvsroot/bitweaver/_bit_search/refresh_functions.php,v 1.19 2006/02/19 10:09:47 lsces Exp $ + * $Header: /cvsroot/bitweaver/_bit_search/refresh_functions.php,v 1.20 2006/02/20 04:56:08 seannerd Exp $ * * Copyright (c) 2004 bitweaver.org * Copyright (c) 2003 tikwiki.org @@ -8,7 +8,7 @@ * All Rights Reserved. See copyright.txt for details and a complete list of authors. * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details * - * $Id: refresh_functions.php,v 1.19 2006/02/19 10:09:47 lsces Exp $ + * $Id: refresh_functions.php,v 1.20 2006/02/20 04:56:08 seannerd Exp $ * @author Luis Argerich (lrargerich@yahoo.com) * @package search * @subpackage functions @@ -53,58 +53,30 @@ function random_refresh_index($pContentType = "") { } /* - * Index Refresh Function for Tiki Content + * Index Refresh Function for Liberty Content * This can be called directly to force a refresh for a particular piece of tiki content. * This is also called by the Random_Refresh_* indexing functions from tiki. * This currently works for wiki pages, blog posts and articles. */ -function refresh_index( $pvContentId = 0 ) { - /* +function refresh_index( $pContentObject = null ) { global $gBitSystem; - if (is_object($pvContentId)) { // InvokeService calls pass objects. - $contentId = $pvContentId->mContentId; - $contentGUID = $pvContentId->mContentTypeGuid; - } else { - $contentId = $pvContentId; - $contentGUID = ""; - } - require_once( LIBERTY_PKG_PATH.'LibertyComment.php' ); - require_once( WIKI_PKG_PATH.'BitPage.php' ); - require_once( BLOGS_PKG_PATH.'BitBlogPost.php' ); - require_once( ARTICLES_PKG_PATH.'BitArticle.php' ); - if ($contentId > 0) { - if (empty($contentGUID)) { - $sql = "SELECT `content_type_guid` FROM `" . BIT_DB_PREFIX . "liberty_content` WHERE `content_id` = " . $contentId; - $contentGUID = $gBitSystem->mDb->getOne($sql, array()); - } - $fields = ""; - $joins = ""; - switch ($contentGUID) { - case BITPAGE_CONTENT_TYPE_GUID : - $fields = ", t1.`description`"; - $joins = " INNER JOIN `" . BIT_DB_PREFIX . "tiki_pages` t1 ON tc.`content_id` = t1.`content_id`"; - break; - case BITARTICLE_CONTENT_TYPE_GUID : - $fields = ", t1.`description`, t1.`status_id`"; - $joins = " INNER JOIN `" . BIT_DB_PREFIX . "tiki_articles` t1 ON tc.`content_id` = t1.`content_id`"; - break; - default: + if (is_object($pContentObject)) { + if (!isset($pContentObject->mInfo["index_data"]) and method_exists($pContentObject, 'setIndexData')) { + $pContentObject->setIndexData() ; } - $query = "SELECT lc.`title`, lc.`data`, uu.`login`, uu.`real_name`" . $fields . " " . - "FROM `" . BIT_DB_PREFIX . "liberty_content` lc " . - "INNER JOIN `" . BIT_DB_PREFIX . "users_users` uu ON uu.`user_id` = lc.`user_id`" . - $joins . " WHERE lc.`content_id` = " . $contentId; - $result = $gBitSystem->mDb->query($query, array()); - $res = $result->fetchRow(); - if (($contentGUID <> BITARTICLE_CONTENT_TYPE_GUID) - or ($contentGUID == BITARTICLE_CONTENT_TYPE_GUID and $res["status_id"] == ARTICLE_STATUS_APPROVED)) { - $words = search_index($res["title"] . " " . $res["data"] . " " . $res["login"] . - " " . $res["real_name"] . (empty($pAuxTable) ? "" : " " . $res["description"])); - insert_index($words, $contentGUID, $contentId); + if (isset($pContentObject->mInfo["index_data"]) and isset($pContentObject->mContentId)) { + if (isset($pContentObject->mType["content_type_guid"])) { + $contentTypeGuid = $pContentObject->mType["content_type_guid"]; + } elseif (isset($pContentObject->mContentTypeGuid)) { + $contentTypeGuid = $pContentObject->mContentTypeGuid; + } + if (isset($contentTypeGuid)) { + $words = prepare_words($pContentObject->mInfo["index_data"]); + insert_index($words, $contentTypeGuid, $pContentObject->mContentId); + } } } - */ } // Legacy index handlers - blogs (blog headers) are not in liberty_content yet @@ -132,9 +104,8 @@ function refresh_index_blogs( $pBlogId = 0 ) { FROM `".BIT_DB_PREFIX."blogs` b INNER JOIN `".BIT_DB_PREFIX."users_users` uu ON uu.`user_id` = b.`user_id` WHERE `blog_id` = " . $pBlogId; - $result = $gBitSystem->mDb->query($query, array()); - $res = $result->fetchRow(); - $words = search_index($res["title"]." ".$res["user"]." ".$res["real_name"]." ".$res["description"]); + $res = $gBitSystem->mDb->getRow($query, array($pBlogId)); + $words = prepare_words($res["title"]." ".$res["user"]." ".$res["real_name"]." ".$res["description"]); insert_index($words, BITBLOG_CONTENT_TYPE_GUID, -1, $pBlogId); } } @@ -148,7 +119,7 @@ function refresh_index_oldest(){ } } -function search_index($data) { +function prepare_words($data) { $data = strip_tags($data); // split into words $sstrings = preg_split("/[\W]+/", $data, -1, PREG_SPLIT_NO_EMPTY); @@ -170,18 +141,19 @@ function delete_index ($pContentId) { $gBitSystem->mDb->query($sql, array($pContentId)); } } -function insert_index( &$words, $pContentId ) { + +function insert_index( &$words, $location, $pContentId ) { global $gBitSystem; if( !empty( $pContentId ) ) { delete_index($pContentId); $now = $gBitSystem->getUTCTime(); foreach ($words as $key=>$value) { if (strlen($key) >= $gBitSystem->getPreference( 'search_min_wordlength') ) { - // todo: stopwords + // todo: stopwords + common words. $query = "INSERT INTO `" . BIT_DB_PREFIX . "searchindex` (`content_id`,`searchword`,`i_count`,`last_update`) values (?,?,?,?)"; $gBitSystem->mDb->query($query, array($pContentId, $key, (int) $value, $now)); - } + } // What happened to location? } } } @@ -201,19 +173,20 @@ function delete_index_content_type($pContentType) { $array = array($pContentType); } $gBitSystem->mDb->query( $sql, $array ); - } function rebuild_index($pContentType, $pUnindexedOnly = false) { - global $gBitSystem; + global $gBitSystem, $gLibertySystem; + $arguments = array(); $whereClause = ""; ini_set("max_execution_time", "300"); if (!$pUnindexedOnly) { delete_index_content_type($pContentType); } - $query = "SELECT `content_id` FROM `" . BIT_DB_PREFIX . "liberty_content`"; + $query = "SELECT `content_id`, `content_type_guid` FROM `" . BIT_DB_PREFIX . "liberty_content`"; if ( $pContentType <> "pages") { - $whereClause = " WHERE `content_type_guid` = '" . $pContentType . "'"; + $whereClause = " WHERE `content_type_guid` = ?"; + $arguments[] = $pContentType; } if ($pUnindexedOnly) { if (empty($whereClause)) { @@ -223,13 +196,19 @@ function rebuild_index($pContentType, $pUnindexedOnly = false) { } $whereClause .= "`content_id` NOT IN (SELECT DISTINCT `content_id` FROM `" . BIT_DB_PREFIX . "searchindex`)" ; } - $result = $gBitSystem->mDb->query($query . $whereClause); + $orderBy = " ORDER BY `content_type_guid` "; + $result = $gBitSystem->mDb->query($query . $whereClause . $orderBy, $arguments); $count = 0; if( $result ) { $count = $result->RecordCount(); while ($res = $result->fetchRow()) { - $contentId = $res["content_id"]; - refresh_index($contentId); + if( isset( $gLibertySystem->mContentTypes[$res["content_type_guid"]] ) ) { + $type = $gLibertySystem->mContentTypes[$res["content_type_guid"]]; + require_once( constant( strtoupper( $type['handler_package'] ).'_PKG_PATH' ).$type['handler_file'] ); + $obj = new $type['handler_class']( NULL, $res["content_id"] ); + refresh_index($obj); + unset($obj); + } } } return $count; diff --git a/search_lib.php b/search_lib.php index dd45d0f..2abcd89 100644 --- a/search_lib.php +++ b/search_lib.php @@ -1,6 +1,6 @@ <?php /** - * $Header: /cvsroot/bitweaver/_bit_search/search_lib.php,v 1.18 2006/02/19 10:09:47 lsces Exp $ + * $Header: /cvsroot/bitweaver/_bit_search/search_lib.php,v 1.19 2006/02/20 04:56:08 seannerd Exp $ * * Copyright (c) 2004 bitweaver.org * Copyright (c) 2003 tikwiki.org @@ -8,7 +8,7 @@ * All Rights Reserved. See copyright.txt for details and a complete list of authors. * Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details * - * $Id: search_lib.php,v 1.18 2006/02/19 10:09:47 lsces Exp $ + * $Id: search_lib.php,v 1.19 2006/02/20 04:56:08 seannerd Exp $ * @author Luis Argerich (lrargerich@yahoo.com) * @package search */ @@ -17,6 +17,7 @@ * @package search * @subpackage SearchLib */ + class SearchLib extends BitBase { function SearchLib() { BitBase::BitBase(); @@ -156,18 +157,19 @@ class SearchLib extends BitBase { } function find_exact_generic($where, $words, $offset, $max_records) { - global $gPage, $gBitSystem, $gLibertySystem; + global $gPage, $gBitSystem, $gLibertySystem, $gBitDbType; $allowed = array(); $ret = array(); - foreach( $gLibertySystem->mContentTypes as $contentType ) { - if (($where == $contentType["content_type_guid"] or $where == "") + if (($where == $contentType["content_type_guid"] or $where == "") // pages ? and $this->has_permission($contentType["content_type_guid"])) { $allowed[] = $contentType["content_type_guid"]; } } if (count($allowed) > 0) { + // Putting in the below hack because mssql cannot select distinct on a text blob column. + // $dbFieldHack = $gBitDbType == 'mssql' ? " CAST(tc.`data` AS VARCHAR(250)) as `data` " : " tc.`data` "; $qPlaceHolders1 = implode(',', array_fill(0, count($words), '?')); $selectSql = ''; |
