diff options
Diffstat (limited to 'includes/htmlparser/htmlgrammarparser.inc')
| -rwxr-xr-x | includes/htmlparser/htmlgrammarparser.inc | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/includes/htmlparser/htmlgrammarparser.inc b/includes/htmlparser/htmlgrammarparser.inc new file mode 100755 index 0000000..259fcce --- /dev/null +++ b/includes/htmlparser/htmlgrammarparser.inc @@ -0,0 +1,478 @@ +<?php +if (!defined("_ECHOSERVER_HTML_GRAMMARPARSER")) { +define("_ECHOSERVER_HTML_GRAMMARPARSER",1); + +class HtmlGrammarParser { + var $line, + $column, + $pos, + $length, + $data, + $pg, + $pgpos, + $parpos, + $incomment, + $name, + $allreadyparsed, + $errors, + $errpos, + $quotstate, + $firstprev, + $secondprev, + $firststate, + $secondstate, + $iseof, + $mode, + $tagname, + $parname; +/********************************************************************************** + * Class constructor + **********************************************************************************/ + function HtmlGrammarParser($data) { + $this->firstprev=array("state"=>0,"word"=>""); + $this->secondprev=array("state"=>0,"word"=>""); + $this->line=0; + $this->pos=0; + $this->errors=array(); + $this->errpos=-1; + $this->incomment=-1; + $this->allreadyparsed=0; + $this->pg=array(); + $this->pgpos=-1; + $this->quotstate=-1; + $this->iseof=false; + $this->firststate=0; + $this->secondstate=0; + $this->mode=1; + + if(gettype($this->data)=="array") { + $this->pg=&$data; + $this->allreadyparsed=1; + return; + } + clearstatcache(); + $this->name=$data; + if (!file_exists($this->name)) { + $this->SetError(1,"File $this->name not exists.",0,0,"Error"); + return; + } + if (!$fp=fopen($this->name,"r")) { + $this->SetError(1,"Can't open file $this->name.",0,0,"Error"); + return; + } + flock($fp,1); + $this->data=fread($fp,filesize($this->name)); + flock($fp,3); + fclose($fp); + $this->length=strlen($this->data); + } + +/******************************************************************************************** + * Store parser's errors and warnings + ********************************************************************************************/ + function SetError($e,$str,$line=0,$column=0,$errtype="Warning") { + $this->errors[++$this->errpos]["type"]=$errtype; + $this->errors[$this->errpos]["code"]=$e; + $this->err=$e; + $this->errstr="<b>$errtype:</b> $e, $str"; + if ($line) { + if (strlen($this->name)) + $this->errstr.="object <font color=\"red\">$this->name</font>"; + $this->errstr.=" Line <b>$line</b>, Column <b>$column</b>"; + } + $this->errors[$this->errpos]["str"]=$this->errstr."<br>\r\n"; + } + +/******************************************************************************************** + * Print parser's errors and warnings + ********************************************************************************************/ + function PrintErrors() { + for ($i=0;$i<=$this->errpos;$i++) + print $this->errors[$i]["str"]; + } + +/******************************************************************************************** + * Get word from data + ********************************************************************************************/ + function GetWord($word) { + $word=""; + $found=0; + $iter=0; + if ($this->pos>$this->length) + return false; + while (!$found) { + if ($this->pos>$this->length) + return false; + if ($this->pos==$this->length) { + $this->pos++; + return $word; + } + switch($this->data[$this->pos]) { + case "*": + if ($this->quotstate==1) { + $word.=$this->data[$this->pos++]; + $this->column++; + break; + } + $this->column++; + $this->pos++; + if ($word[0]=="/") + $found=1; + $word.=$this->data[$this->pos-1]; + break; + case "/": + if ($this->quotstate==1) { + $word.=$this->data[$this->pos++]; + $this->column++; + break; + } + $this->column++; + $this->pos++; + if ($word[0]=="*") + $found=1; + $word.=$this->data[$this->pos-1]; + break; + case " ": + case "\r": + case "\t": + if ($this->quotstate==1) { + $word.=$this->data[$this->pos++]; + $this->column++; + break; + } + $this->column++; + $this->pos++; + if (strlen($word)) + $found=1; + break; + case "\n": + if ($this->quotstate==1) { + $word.=$this->data[$this->pos++]; + $this->column++; + break; + } + $this->column=0; + $this->line++; + $this->pos++; + if (strlen($word)) + $found=1; + break; + case ">": + case "<": + case "=": + if ($this->quotstate==1) { + $word.=$this->data[$this->pos++]; + $this->column++; + } else { + if (!strlen($word)) { + $word=$this->data[$this->pos++]; + $this->column++; + } + $found=1; + } + break; + case "\"": + if ($this->pos) { + if ($this->data[$this->pos-1]=="\\") { + $word.=$this->data[$this->pos++]; + $this->column++; + } else { + if (!strlen($word)) { + $this->quotstate*=-1; + $word=$this->data[$this->pos++]; + $this->column++; + } + $found=1; + } + } else { + $word=$this->data[$this->pos++]; + $this->column++; + $found=1; + } + break; + default: + $this->column++; + $word.=$this->data[$this->pos++]; + } + } + return true; + } + +/******************************************************************************************** + * Parse grammar first step + ******************************************************************************************** +Parse +< [] [] > + +in/state 0 1 2 3 +< 1 -1 -1 1 +[ -1 2 -1 -1 +] -1 -1 1 -1 +> -1 3 -1 -1 +word -1 1 2 -1 +EOF -1 -1 -1 -2 + +-2 end parse + 0 begin parse, waiting '<' + 1 got '<' need to parse parameters, or wait '>' or wait '[' + 2 got '[' or ']' need to parse parameters + 3 got '>', waiting eof or '<' + + ********************************************************************************************/ + function ParseFirst($word) { + if ($this->iseof) { + $this->firstprev["state"]=0; + $this->firstprev["word"]=""; + return true; + } + $automat=array( + "0"=>array( 1, -1, -1, 1), + "1"=>array(-1, 2, -1, -1), + "2"=>array(-1, -1, 1, -1), + "3"=>array(-1, 3, -1, -1), + "4"=>array(-1, 1, 2, -1), + "5"=>array(-1, -1, -1, -2) + ); + switch($word) { + case "<": + $instate=0; + $this->pgpos++; + $this->parpos=-1; + break; + case "[": + $this->parpos++; + $instate=1; + break; + case "]": + $instate=2; + break; + case ">": + $instate=3; + break; + default: + $instate=4; + break; + } + $this->firststate=$automat[$instate][$this->firststate]; + if ($this->firststate==-1) return false; + switch ($this->firststate) { + case 1: + $this->mode=1; + if ($this->firstprev["state"]==1) + if (!$this->ParseSecond($word)) return false; + break; + case 2: + switch($this->firstprev["state"]) { + case 1: + $this->mode=2; + break; + case 3: + case 2: + if ($this->firstprev["state"]==2) + $this->mode=2; + else + $this->mode=1; + break; + } + if ($this->firstprev["state"]==2) + if (!$this->ParseSecond($word)) return false; + break; + case 3: + if (isset($this->pg[$this->pgpos]["tag"]["nohavesametag"])) + $this->pg[$this->pgpos]["tag"]["closeon"]["in"][]=$this->tagname; + break; + } + $this->firstprev["state"]=$this->firststate; + $this->firstprev["word"]=$word; + return true; + } + +/******************************************************************************************** + * Parse grammar second step + ******************************************************************************************** + +Parse +par1="value" par2=value + +in/state 0 1 2 3 4 += -1 2 -1 3 -1 +" -1 -1 3 4 -1 +word 1 -1 4 3 1 +EOF -1 -1 -1 -1 -1 + +-3 end parse by '>' +-2 end parse by ']' + 0 begin parse waiting parname + 1 got parname, waiting '=' or new parname + 2 got '=' waiting any word as value or first '"' + 3 collect words to next '"' + 4 got parvalue, waiting new parname + ********************************************************************************************/ + function ParseSecond($word) { + if ($this->iseof) return false; + $automat=array( + "0"=>array(-1, 2, -1, 3, -1), + "1"=>array(-1, -1, 3, 4, -1), + "2"=>array( 1, 1, 4, 3, 1), + "3"=>array(-1, -1, -1, -1, -1) + ); + switch($word) { + case "=": + $instate=0; + break; + case "\"": + $instate=1; + break; + default: + $instate=2; + break; + } + $this->secondstate=$automat[$instate][$this->secondstate]; + if ($this->secondstate==-1) return false; + switch ($this->secondstate) { + case 1: + $this->parname=$word; + if (!ereg("[a-zA-Z_-]+([0-9]+)?",$word)) { + $this->SetError(1,"Fatal error.",$this->line,$this->column,"Error"); + return false; + } + switch($this->mode) { + case 1: + $this->pg[$this->pgpos]["tag"][$this->parname]=""; + break; + case 2: + $this->pg[$this->pgpos]["pars"][$this->parpos][$this->parname]=""; + break; + } + break; + case 4: + switch($this->mode) { + case 1: + if ($this->secondprev["state"]==3) + $this->pg[$this->pgpos]["tag"][$this->parname]=$this->secondprev["word"]; + else + $this->pg[$this->pgpos]["tag"][$this->parname]=$word; + if ($this->parname=="closeon") { + $notexists=array(); + $exists=array(); + $this->ParseCloseOn($this->pg[$this->pgpos]["tag"][$this->parname],&$notexists,&$exists); + $this->pg[$this->pgpos]["tag"][$this->parname]=array(); + $this->pg[$this->pgpos]["tag"][$this->parname]["notin"]=$notexists; + $this->pg[$this->pgpos]["tag"][$this->parname]["in"]=$exists; + } elseif ($this->parname=="tag") + $this->tagname=$this->pg[$this->pgpos]["tag"]["tag"]; + break; + case 2: + if ($this->secondprev["state"]==3) + $this->pg[$this->pgpos]["pars"][$this->parpos][$this->parname]=$this->secondprev["word"]; + else + $this->pg[$this->pgpos]["pars"][$this->parpos][$this->parname]=$word; + break; + } + break; + } + $this->secondprev["state"]=$this->secondstate; + $this->secondprev["word"]=$word; + return true; + } + +/******************************************************************************************** + * Parse closeon structure + ********************************************************************************************/ + function ParseCloseOn($str,$notexists,$exists) { + $arr=explode("|",$str); + if (!is_array($arr)) { + if (!strlen($str)) + return; + else + $arr[]=$str; + } + for ($i=0;$i<sizeof($arr);$i++) { + if ($arr[$i][0]=="!") + $notexists[]=substr($arr[$i],1,strlen($arr[$i])-1); + else + $exists[]=$arr[$i]; + } + } + +/******************************************************************************************** + * Parse grammar + ********************************************************************************************/ + function Parse() { + if ($this->allreadyparsed) return true; + $this->line=1; + while(1) { + $isword=$this->GetWord(&$word); + if (!$isword) $this->iseof=true; + switch (strtolower($word)) { + case "/*"; + $this->incomment*=-1; + break; + case "*/"; + if ($this->incomment!=1) { + $this->SetError(1,"Not found begin of comment operator.",$this->line,$this->column,"Error"); + return; + } + $this->incomment*=-1; + break; + default: + if ($this->incomment==1) break; + if (!$this->ParseFirst($word)) { + $this->SetError(1,"Fatal error",$this->line,$this->column,"Error"); + return false; + } + break; + } + if ($this->iseof) break; + } + if ($this->incomment==1) { + $this->SetError(1,"Not found end of comment operator.",$this->line,$this->column,"Error"); + return false; + } + $this->PrepareGrammar(); + return true; + } +/******************************************************************************************** + * Prepare grammar for future using + ********************************************************************************************/ + function PrepareGrammar() { + $edittagsaftertable=$this->ScanGrammar(); + $l=sizeof($this->pg); + for ($i=0;$i<$l;$i++) { + $this->pg[$this->pg[$i]["tag"]["tag"]]=$this->pg[$i]["tag"]; + if (isset($this->pg[$i]["pars"])) { + $n=sizeof($this->pg[$i]["pars"]); + for ($j=0;$j<$n;$j++) + $this->pg[$this->pg[$i]["tag"]["tag"]]["pars"][$this->pg[$i]["pars"][$j]["par"]]=$this->pg[$i]["pars"][$j]; + } else + $this->pg[$this->pg[$i]["tag"]["tag"]]["pars"]=array(); + unset($this->pg["$i"]); + } + $this->pg["EDIT_TAGS_AFTER_TABLE"]=$edittagsaftertable; + } +/******************************************************************************************** + * Scan grammar for creating edittagsafter table + ********************************************************************************************/ + function ScanGrammar() { + $edittagsaftertable=array(); + for ($i=0;$i<sizeof($this->pg);$i++) + if (isset($this->pg[$i]["tag"]["edittagsafter"])) + if (!in_array($this->pg[$i]["tag"]["edittagsafter"],$edittagsaftertable)) $edittagsaftertable[]=$this->pg[$i]["tag"]["edittagsafter"]; + return $edittagsaftertable; + } +/******************************************************************************************** + * Save precompiled grammar in file + ********************************************************************************************/ + function SaveGrammar($name) { + $str=serialize($this->pg); + if (!$fp=fopen($name,"w")) + print "<br>Error: Can't create file $name. Unable to save grammar.<br>"; + flock($fp,2); + fwrite($fp,$str,strlen($str)); + flock($fp,3); + fclose($fp); + } +} + +} //_ECHOSERVER_HTML_GRAMMARPARSER +?> |
