您好,欢迎访问一九零五行业门户网

PHP也可以实现词法分析与自定义语言!

之前项目有一个需求,业务人员使用中文编写一些自定义公式,然后需要我们后台执行将结果返回到界面上,于是就基于有限状态机写了这个词法分析器,比较简单,希望能够抛砖引玉。
一、分析需求
输入中文公式,返回结果,比如:
现有薪资=10000;个税起点=3000;当前年份=2021;如果(当前年份=2022){ 个税起点=5000;}返回 (现有薪资-个税起点) * 0.2;
二、实现需求
最初的想法是使用字符串替换的方式,将中文关键字替换成php的关键字,然后调用eval执行,这样确实也是可以的,但是总觉得不是很美丽,并且不能实现动态解析。就想着自己实现一个简单的词法分析,然后结合ast将词法转换成php代码执行,岂不快哉。当前版本没有用到抽象语法树来生成代码,全部使用字符串拼接。【推荐学习:php视频教程】
<?php/** * class lexer * @package sett\oalang * 词法分析器 */class lexer { // 内置关键字集合 public $keywordlist = []; // 内置操作符集合 public $operatorlist = [ "+", "-", "*", "/", "=", ">", "<", "!", "(", ")", "{", "}", ",", ";" ]; // 源代码 private $input; // 当前的字符 private $currchar; // 当前字符位置 private $currcharpos = 0; // 结束符 private $eof = "eof"; // 当前编码 private $currencode = "utf-8"; // 内置关键字 public const var = "variable"; public const str = "string"; public const kw = "keyword"; public const opr = "operator"; public const int = "integer"; public const nil = "null"; /** * lexer constructor. * @param string $input */ public function __construct(string $input) { $this->input = $input; $this->currchar = mb_substr($this->input, $this->currcharpos, 1); } /** * @param array $keywordlist */ public function setkeywordlist($keywordlist) { $this->keywordlist = $keywordlist; } /** * @return array * @throws exception */ public function parseinput() { if ($this->input == "") { throw new exception("code can not be empty"); } $tokens = []; do { $token = $this->nexttoken(); if ($token["type"] != "eof") { $tokens[] = $token; } if ($token["type"] == self::kw) { $tokens[] = $this->maketoken(self::nil, " "); } } while ($token["type"] != "eof"); return $tokens; } /** * @return array */ public function nexttoken() { $this->skipblankchar(); $this->currchar == "" && $this->currchar = $this->eof; if ($this->iscnletter()) { $word = $this->matchuntilnextcharisnotcn(); if ($this->iskeyword($word)) { $this->currcharpos -= 1; return $this->currtoken(static::kw, $word); } // 不是关键字的全部归为变量 return $this->maketoken(static::var, $word); } // 如果是操作符 if ($this->isoperator()) { return $this->currtoken(static::opr, $this->currchar); } // 如果是数字 if ($this->isnumber()) { return $this->currtoken(static::int, $this->currchar); } // 如果是字符串 if ($str = $this->isstr()) { return $this->currtoken(static::str, $str); } // 如果是变量 if ($this->isvar()) { $word = $this->matchvar(); if ($this->iskeyword($word)) { return $this->currtoken(static::kw, $word); } return $this->maketoken(static::var, $word); } if ($this->currchar == $this->eof) { return $this->currtoken('eof', $this->currchar); } return $this->currtoken(static::var, $this->currchar); } /** * @param string $input * @return string */ private function matchvar(string $input = "") { $word = $input ?: ''; while ($this->isvar()) { $word .= $this->currchar; $this->nextchar(); } return $word; } /** * @return bool * 是否为普通变量 */ private function isvar() { return $this->iscnletter() || $this->isenletter(); } /** * 跳过空白字符 */ private function skipblankchar() { while (ord($this->currchar) == 10 || ord($this->currchar) == 13 || ord($this->currchar) == 32) { $this->nextchar(); } } /** * @param string $type * @param $word * @return array * 记录当前token和下一个字符 */ private function currtoken(string $type, $word) { $token = $this->maketoken($type, $word); $this->nextchar(); return $token; } /** * @param string $type * @param string $char * @return array */ private function maketoken(string $type, string $char) { return ["type" => $type, "char" => $char, "pos" => $this->currcharpos]; } /** * @return bool * 判断是否是英文字符 */ private function isenletter() { if ($this->currchar == "" || $this->currchar == $this->eof) { return false; } $ord = mb_ord($this->currchar, $this->currencode); if ($ord > ord('a') && $ord < ord('z')) { return true; } return false; } /** * @return false|int * 是否中文字符 */ private function iscnletter() { return preg_match("/^[\x{4e00}-\x{9fa5}]+$/u", $this->currchar); } /** * @return bool * 是否为数字 */ private function isnumber() { return is_numeric($this->currchar); } /** * @return bool * 是否是字符串 */ private function isstr() { return $this->matchcompletestr(); } /** * @return string * 匹配完整字符串 */ private function matchcompletestr() { $char = ""; if ($this->currchar == "\"") { $this->nextchar(); while ($this->currchar != "\"") { if ($this->currchar != "\"") { $char .= $this->currchar; } $this->nextchar(); } return $char; } return $char; } /** * @return bool * 是否是操作符 */ private function isoperator() { return in_array($this->currchar, $this->operatorlist); } /** * @return string * 匹配中文字符 */ private function matchuntilnextcharisnotcn() { $char = ""; while ($this->iscnletter()) { $char .= $this->currchar; $this->nextchar(); } return $char; } /** * @return void 获取下一个字符 * 获取下一个字符 */ private function nextchar() { $this->currcharpos += 1; $this->currchar = mb_substr($this->input, $this->currcharpos, 1); if ($this->currchar == "") { $this->currchar = $this->eof; } } /** * @param string $input * @return bool * 是否是关键字 */ private function iskeyword(string $input) { return ($this->keywordlist[$input] "") != ""; } public function convert(array $tokens) { $code = ""; foreach ($this->lexeriterator($tokens) as $generator) { switch ($generator["type"]) { case static::kw: $code .= $this->keywordlist[$generator["char"]]; break; case static::var: $code .= sprintf("$%s", $generator["char"]); break; case static::opr: $code .= $this->replace($generator["char"]); break; case static::int: $code .= $generator["char"]; break; case static::str: $code .= sprintf("\"%s\"", $generator["char"]); break; default: $code .= $generator["char"]; } } return $code; } private function replace(string $char) { return str_replace("+", ".", $char); } /** * @param array $tokens * @return \generator */ private function lexeriterator(array $tokens) { foreach ($tokens as $index => $token) { yield $token; } }}
三、如何使用
require __dir__ . "/vendor/autoload.php";// 定义一段代码$code = <<<eof姓名="腕豪";问候="你好啊";地址=(1+2) * 3;如果(地址 > 3){ 地址=1;}否则{ 地址="艾欧尼亚"}说话 = ("我"+"爱")+"你";返回 姓名+年龄;eof;$lexer = new lexer($code);// 自定义你的关键字$kwmap = [ "如果" => "if", "否则" => "else", "返回" => "return", "否则如果" => "elseif"];$lexer->setkeywordlist($kwmap);// 这里是生成的词$tokens = $lexer->parseinput();// 将生成的词转成php,当然你也可以尝试用php-parse转ast再转成php,这里只是简单的拼接var_dump($lexer->convert($tokens));
生成词
[{ "type": "variable", "char": "姓名", "pos": 2}, { "type": "operator", "char": "=", "pos": 2}, { "type": "string", "char": "腕豪", "pos": 7}, { "type": "operator", "char": ";", "pos": 8}, { "type": "variable", "char": "问候", "pos": 13}, { "type": "operator", "char": "=", "pos": 13}, { "typ e": "string", "char": "你好啊", "pos": 17}, { "type": "operator", "char": ";", "pos": 18}, { "type": "variable", "char": "地址", "pos": 23}, { "type": "operator", "char": "=", "pos": 23}, { "type": "operator", "char": "(", "pos": 24}, { "type": "integer", "char": "1", "pos": 25}, { "type": "operator", "char": " +", "pos": 26}, { "type": "integer", "char": "2", "pos": 27}, { "type": "operator", "char": ")", "pos": 28}, { "type": "operator", "char": "*", "pos": 30}, { "type": "integer", "char": "3", "pos": 32}, { "type": "operator", "char": ";", "pos": 33}, { "type": "keyword", "char": "如果", "pos": 37}, { "type": "nul l", "char": " ", "pos": 38}, { "type": "operator", "char": "(", "pos": 38}, { "type": "variable", "char": "地址", "pos": 41}, { "type": "operator", "char": ">", "pos": 42}, { "type": "integer", "char": "3", "pos": 44}, { "type": "operator", "char": ")", "pos": 45}, { "type": "operator", "char": "{", "pos": 46}, { "type": "variable", "char": "地址", "pos": 55}, { "type": "operator", "char": "=", "pos": 55}, { "type": "integer", "char": "1", "pos": 56}, { "type": "operator", "char": ";", "pos": 57}, { "type": "operator", "char": "}", "pos": 60}, { "type": "keyword", "char": "否则", "pos": 62}, { "type": "null", "char ": " ", "pos": 63}, { "type": "operator", "char": "{", "pos": 63}, { "type": "variable", "char": "地址", "pos": 72}, { "type": "operator", "char": "=", "pos": 72}, { "type": "string", "char": "艾欧尼亚", "pos": 78}, { "type": "operator", "char": ";", "pos": 79}, { "type": "operator", "char": "}", "pos": 82}, { "type": "variable", "char": "说话", "pos": 87}, { "type": "operator", "char": "=", "pos": 88}, { "type": "operator", "char": "(", "pos": 90}, { "type": "string", "char": "我", "pos": 93}, { "type": "operator", "char": "+", "pos": 94}, { "type": "string", "char": "爱", "pos": 97}, { "type": "operator", "char": ")", "pos": 98}, { "type": "operator", "char": "+", "pos": 99}, { "type": "string", "char": "你", "pos": 102}, { "type": "operator", "char": ";", "pos": 103}, { "type": "keyword", "char": "返回", "pos": 107}, { "type": "null", "char": " ", "pos": 108}, { "type": "variable", "char": "姓名", "pos": 111}, { "typ e": "operator", "char": "+", "pos": 111}, { "type": "variable", "char": "年龄", "pos": 114}, { "type": "operator", "char": ";", "pos": 114}]
输出:
$姓名="腕豪";$问候="你好啊";$地址=(1.2)*3;if ($地址>3){$地址=1;}else {$地址="艾欧尼亚";}$说话=("我"."爱")."你";return $姓名.$年龄;
能执行吗?当然能。还存在一些小bug,不想改了。
四、使用场景
什么,居然有人说没什么用?oa系统总有用到的时候。
以上就是php也可以实现词法分析与自定义语言!的详细内容。
其它类似信息

推荐信息