一道Google的面試題 正則表達(dá)式解析器
一、題目如下:
--------------------------
Write a parser for a simplified regular expression
On an alphabet set [a-z], a simplified regular expression is much simpler than the normal regular expression.
It has only two meta characters: '.' and '*'.
'.' -- exact one arbitrary character match.
'*' -- zero or more arbitrary character match.
--------------------------
舉個(gè)例子,表達(dá)式 ab.*d 能匹配 'abcd', 'abccd', 'abad'。 不能匹配'abc', ' '(空字符串), 'abd'。
二、解法:
解析給定的表達(dá)式
生成對(duì)應(yīng)的 NFA(Nondeterministic Finite Automation)
NFA轉(zhuǎn)化為DFA(Deterministic Finite Automation)
利用DFA判斷輸入字符串
不懂正則表達(dá)式? 參考http://deerchao.net/tutorials/regex/regex.htm
不懂NFA? 參考http://planning.cs.uiuc.edu/node558.html
不懂DFA?參考http://lambda.uta.edu/cse5317/notes/node8.html
三、相關(guān)代碼:
FiniteAutomata.java 和State.java,隨手寫(xiě)寫(xiě),請(qǐng)選擇性參考。
FiniteAutomata.java
- package parser;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.LinkedList;
- import java.util.Map;
- import java.util.Map.Entry;
- import java.util.Queue;
- import java.util.Set;
- public class FiniteAutomata {
- private State start;
- private final static char[] inputs;
- static {
- inputs = new char[26];
- for (char i = 0; i < 26; i++) {
- inputs[i] = (char) ('a' + i);
- }
- }
- private final char episilon = 0;
- private char stateId;
- public void compile(String pattern) {
- this.stateId = 'A';
- State NFA = toNFA(pattern);
- this.start = convertToDFA(NFA);
- }
- private State convertToDFA(State NFA) {
- Map<Set<State>, State> cache = new HashMap<Set<State>, State>();
- Queue<Set<State>> queue = new LinkedList<Set<State>>();
- // start state of DFA
- Set<State> start = episilon(NFA);
- State starter = nextState();
- cache.put(start, starter);
- queue.offer(start);
- while (!queue.isEmpty()) {
- Set<State> currentEpisilon = queue.poll();
- State current = cache.get(currentEpisilon);
- // create new State
- for (char input : inputs) {
- Set<State> nextEpisilon = move(currentEpisilon, input);
- if (nextEpisilon.isEmpty()) {
- continue;
- }
- State next;
- if (!cache.containsKey(nextEpisilon)) {
- next = nextState();
- cache.put(nextEpisilon, next);
- queue.offer(nextEpisilon);
- } else {
- next = cache.get(nextEpisilon);
- }
- boolean isAccept = containsAcceptState(nextEpisilon);
- next.setAccept(isAccept);
- current.setTransition(input, next);
- }
- }
- return starter;
- }
- private Set<State> move(Set<State> currentEpisilon, char input) {
- Set<State> result = new HashSet<State>();
- for (State s : currentEpisilon) {
- State next = s.transit(input);
- if (next != null) {
- result.add(next);
- }
- }
- return episilon(result);
- }
- private boolean containsAcceptState(Set<State> nextEpisilon) {
- for (State state : nextEpisilon) {
- if (state.isAccept()) {
- return true;
- }
- }
- return false;
- }
- private Set<State> episilon(State s) {
- Set<State> cache = new HashSet<State>();
- cache.add(s);
- return episilon(s, cache);
- }
- private Set<State> episilon(Set<State> states) {
- Set<State> cache = new HashSet<State>();
- for (State s : states) {
- cache.add(s);
- cache.addAll(episilon(s, cache));
- }
- return cache;
- }
- private Set<State> episilon(State s, Set<State> cache) {
- State next = s.transit(episilon);
- if (next != null && !cache.contains(next)) {
- cache.add(next);
- cache.addAll(episilon(s, cache));
- }
- return cache;
- }
- private State toNFA(String pattern) {
- char[] expr = pattern.toCharArray();
- State currentState = nextState();
- State starter = currentState;
- for (char c : expr) {
- if (c == '.') {
- State nextState = nextState();
- // add each transition for all inputs
- for (char i : inputs) {
- currentState.setTransition(i, nextState);
- }
- currentState = nextState;
- } else if (c == '*') {
- State nextState = nextState();
- // add each transition for all inputs
- for (char i : inputs) {
- currentState.setTransition(i, nextState);
- }
- currentState.setTransition(episilon, nextState);
- nextState.setTransition(episilon, currentState);
- currentState = nextState;
- } else if (c >= 'a' && c <= 'z') {
- State nextState = nextState();
- currentState.setTransition(c, nextState);
- currentState = nextState;
- } else {
- throw new RuntimeException("Unrecognized expression");
- }
- }
- currentState.setAccept(true);
- return starter;
- }
- private State nextState() {
- return new State(stateId++);
- }
- public void constructDFA(Map<State, Map<Character, State>> mapping) {
- Iterator<Map.Entry<State, Map<Character, State>>> it = mapping
- .entrySet().iterator();
- while (it.hasNext()) {
- Entry<State, Map<Character, State>> entry = it.next();
- State state = entry.getKey();
- Map<Character, State> transition = entry.getValue();
- state.setTransition(transition);
- }
- }
- public boolean match(String c) {
- char[] candidate = c.toCharArray();
- for (char d : candidate) {
- start = start.transit(d);
- if (start == null) {
- return false;
- }
- }
- return start.isAccept();
- }
- public static String[] patterns = { "abc", "*", "*abc", "*abc", "a*bc",
- "a*bc", "a*", "a*", "a*", "a*", "*abc*", "*****", "...", ".*",
- ".bc*", ".b*c*a", "*", "abc", "*a", "a", ".a*c", "a.*b", "..", "",
- "" };
- public static String[] candidates = { "abc", "abc", "abc", "aaabbbabc",
- "aaabbbabc", "abc", "abc", "a", "aa", "abcdef", "abc", "abc",
- "abc", "abc", "abc", "abca", "", "abcd", "abcd", "", "abc", "abc",
- "abc", "", "abc" };
- public static void main(String[] args) {
- FiniteAutomata fa = new FiniteAutomata();
- for (int i = 0; i < patterns.length; i++) {
- fa.compile(patterns[i]);
- System.out.println(fa.match(candidates[i]));
- }
- }
- }
State.java
- package parser;
- import java.util.HashMap;
- import java.util.Map;
- public class State {
- private boolean accept = false;
- private char id;
- private Map<Character, State> mapping = new HashMap<Character, State>();
- public State(char c) {
- this.id = c;
- }
- public State(char c, boolean isAccept) {
- this.id = c;
- this.accept = isAccept;
- }
- public State transit(char input) {
- return mapping.get(input);
- }
- public void setTransition(char input, State next) {
- this.mapping.put(input, next);
- }
- public void setTransition(Map<Character, State> mapping) {
- this.mapping = mapping;
- }
- public boolean isAccept() {
- return this.accept;
- }
- public void setAccept(boolean b) {
- this.accept = b;
- }
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + id;
- return result;
- }
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- State other = (State) obj;
- if (id != other.id)
- return false;
- return true;
- }
- @Override
- public String toString() {
- return "State [id=" + id + "]";
- }
- }