第一步,构建trie树,定义node类型:
/**
* created by zhaoyy on 2017/2/7.
*/
interface node {
char value();
boolean exists();
boolean isroot();
node parent();
node childof(char c);
node fail();
void setfail(node node);
void setexists(boolean exists);
void add(node child);
list<node> children();
}
第二步,实现两种node,如果词汇全是可打印的ascii字符,就采用asciinode,否则(比如包含汉字),使用基于hash表的mapnode;这两种node均集成自abstractnode:
/**
* created by zhaoyy on 2017/2/8.
*/
abstract class abstractnode implements node {
private static final char empty = '\0';
private final char value;
private final node parent;
private boolean exists;
private node fail;
public abstractnode(node parent, char value) {
this.parent = parent;
this.value = value;
this.exists = false;
this.fail = null;
}
public abstractnode() {
this(null, empty);
}
private static string tab(int n) {
stringbuilder builder = new stringbuilder();
for (int i = 0; i < n; i++) {
builder.append('\t');
}
return builder.tostring();
}
private static string tostring(node node, int depth) {
stringbuilder builder = new stringbuilder();
string tab = tab(depth);
node fail = node.fail();
node parent = node.parent();
builder
.append(tab)
.append('<')
.append(node.value())
.append(" exists=\"")
.append(node.exists())
.append('"')
.append(" parent=\"")
.append(parent == null ? "null" : parent.value())
.append('"')
.append(" fail=\"")
.append(fail == null ? "null" : fail.value())
.append("\">\r\n");
for (node child : node.children())
builder.append(tostring(child, depth + 1));
builder
.append(tab)
.append("</")
.append(node.value())
.append(">\r\n");
return builder.tostring();
}
@override
public char value() {
return value;
}
@override
public boolean exists() {
return exists;
}
@override
public boolean isroot() {
return value == empty;
}
@override
public node parent() {
return parent;
}
@override
public node fail() {
return fail;
}
@override
public void setfail(node node) {
this.fail = node;
}
@override
public void setexists(boolean exists) {
this.exists = exists;
}
@override
public string tostring() {
return tostring(this, 0);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////
/**
* created by zhaoyy on 2017/2/8.
*/
final class asciinode extends abstractnode implements node {
private static final char from = 32;
private static final char to = 126;
private final node[] children;
public asciinode() {
super();
this.children = new node[to - from + 1];
}
public asciinode(node parent, char value) {
super(parent, value);
this.children = new node[to - from + 1];
}
@override
public node childof(char c) {
if (c >= from && c <= to)
return children[(int) c - from];
else return null;
}
@override
public void add(node child) {
int index = (int) child.value();
children[index - from] = child;
}
@override
public list<node> children() {
list<node> nodes = new arraylist<node>();
for (node child : children)
if (child != null)
nodes.add(child);
return nodes;
}
}
//////////////////////////////////////////////////////////////////////////////////////////////
/**
* created by zhaoyy on 2017/2/8.
*/
final class mapnode extends abstractnode implements node {
private final map<character, node> children;
public mapnode() {
super();
this.children = new hashmap<character, node>();
}
public mapnode(node parent, char value) {
super(parent, value);
this.children = new hashmap<character, node>();
}
@override
public node childof(char c) {
return children.get(c);
}
@override
public void add(node child) {
children.put(child.value(), child);
}
@override
public list<node> children() {
list<node> nodes = new arraylist<node>();
nodes.addall(children.values());
return nodes;
}
}
第三步,
首先定义一个node构造器:
/**
* created by zhaoyy on 2017/2/8.
*/
public interface nodemaker {
node make(node parent, char value);
node makeroot();
}
然后构建ac自动机,实现创建及查找方法:
/**
* created by zhaoyy on 2017/2/7.
*/
public final class wordtable {
private final node root;
private wordtable(collection<? extends charsequence> words, nodemaker maker) {
node root = buildtrie(words, maker);
setfailnode(root);
this.root = root;
}
public static wordtable compile(collection<? extends charsequence> words) {
if (words == null || words.isempty())
throw new illegalargumentexception();
final nodemaker maker;
if (isascii(words))
maker = new nodemaker() {
@override
public node make(node parent, char value) {
return new asciinode(parent, value);
}
@override
public node makeroot() {
return new asciinode();
}
};
else maker = new nodemaker() {
@override
public node make(node parent, char value) {
return new mapnode(parent, value);
}
@override
public node makeroot() {
return new mapnode();
}
};
return new wordtable(words, maker);
}
private static boolean isascii(collection<? extends charsequence> words) {
for (charsequence word : words) {
int len = word.length();
for (int i = 0; i < len; i++) {
int c = (int) word.charat(i);
if (c < 32 || c > 126)
return false;
}
}
return true;
}
private static node buildtrie(collection<? extends charsequence> sequences, nodemaker maker) {
node root = maker.makeroot();
for (charsequence sequence : sequences) {
int len = sequence.length();
node current = root;
for (int i = 0; i < len; i++) {
char c = sequence.charat(i);
node node = current.childof(c);
if (node == null) {
node = maker.make(current, c);
current.add(node);
}
current = node;
if (i == len - 1)
node.setexists(true);
}
}
return root;
}
private static void setfailnode(final node root) {
root.setfail(null);
queue<node> queue = new linkedlist<node>();
queue.add(root);
while (!queue.isempty()) {
node parent = queue.poll();
node temp;
for (node child : parent.children()) {
if (parent.isroot())
child.setfail(root);
else {
temp = parent.fail();
while (temp != null) {
node node = temp.childof(child.value());
if (node != null) {
child.setfail(node);
break;
}
temp = temp.fail();
}
if (temp == null)
child.setfail(root);
}
queue.add(child);
}
}
}
public boolean findanyin(charsequence cs) {
int len = cs.length();
node node = root;
for (int i = 0; i < len; i++) {
node next = node.childof(cs.charat(i));
if (next == null) {
next = node.fail();
if (next == null) {
node = root;
continue;
}
}
if (next.exists())
return true;
}
return false;
}
public list<matchinfo> search(charsequence cs) {
if (cs == null || cs.length() == 0)
return collections.emptylist();
list<matchinfo> result = new arraylist<matchinfo>();
int len = cs.length();
node node = root;
for (int i = 0; i < len; i++) {
node next = node.childof(cs.charat(i));
if (next == null) {
next = node.fail();
if (next == null) {
node = root;
continue;
}
}
if (next.exists()) {
matchinfo info = new matchinfo(i, next);
result.add(info);
node = root;
continue;
}
node = next;
}
return result;
}
@override
public string tostring() {
return root.tostring();
}
}
定义一个保存查找结果的实体:
/**
* created by zhaoyy on 2017/2/7.
*/
public final class matchinfo {
private final int index;
private final string word;
public matchinfo(int index, string word) {
this.index = index;
this.word = word;
}
public matchinfo(int index, node node) {
stringbuilder builder = new stringbuilder();
while (node != null) {
if (!node.isroot())
builder.append(node.value());
node = node.parent();
}
string word = builder.reverse().tostring();
this.index = index + 1 - word.length();
this.word = word;
}
public int getindex() {
return index;
}
public string getword() {
return word;
}
@override
public string tostring() {
return index + ":" + word;
}
}
第四步,调用demo:
public static void main(string[] args) {
list<string> list = arrays.aslist("say", "her", "he", "she", "shr", "alone");
wordtable table = wordtable.compile(list);
system.out.println(table);
system.out.println(table.search("1shesaynothingabouthislivinghimalone"));
}
以下是输出结果:
< exists="false" parent="null" fail="null">
<s exists="false" parent=" " fail=" ">
<a exists="false" parent="s" fail="a">
<y exists="true" parent="a" fail=" ">
</y>
</a>
<h exists="false" parent="s" fail="h">
<e exists="true" parent="h" fail="e">
</e>
<r exists="true" parent="h" fail=" ">
</r>
</h>
</s>
<h exists="false" parent=" " fail=" ">
<e exists="true" parent="h" fail=" ">
<r exists="true" parent="e" fail=" ">
</r>
</e>
</h>
<a exists="false" parent=" " fail=" ">
<l exists="false" parent="a" fail=" ">
<o exists="false" parent="l" fail=" ">
<n exists="false" parent="o" fail=" ">
<e exists="true" parent="n" fail=" ">
</e>
</n>
</o>
</l>
</a>
</ >
[1:she, 4:say, 31:alone]
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
更多java实现ac自动机全文检索示例。