超強(qiáng)解析XML——簡(jiǎn)單直接的來
對(duì)于現(xiàn)在越來越輕量級(jí),越來越講究速度和接近用戶的應(yīng)用來說,xml確實(shí)有點(diǎn)復(fù)雜了。解析起來不僅耗內(nèi)存,而且很復(fù)雜。這就好像花了幾千塊錢買了個(gè)MS Office,但是80%的feature都用不著,還白白的耗著CPU和內(nèi)存。個(gè)人覺得,設(shè)置文件用XML其實(shí)挺好,因?yàn)樵O(shè)置文件一般并不太大,而且要求可讀性強(qiáng),還有很多亂七八糟的需求,可以利用XML的力量。昨天搞chrome的設(shè)置,發(fā)現(xiàn)chrome的設(shè)置文件也是使用的json,讀起來也是輕松愉快。前陣子做了個(gè)程序,需要解析豆瓣API調(diào)用返回的XML。真想說一句,豆瓣你別用XML了。至少,提供個(gè)json版的API調(diào)用吧。(以上謹(jǐn)代表個(gè)人觀點(diǎn))
解析豆瓣返回的xml,實(shí)在是不想用DOM這個(gè)重量級(jí)的玩意。DOM這個(gè)玩意,說它強(qiáng)大好還是說它官僚好呢。我傾向于使用SAXP解析。但是現(xiàn)在面臨的一個(gè)問題是,我需要根據(jù)xml節(jié)點(diǎn)的名字和屬性值(一個(gè)或者多個(gè))來決定當(dāng)前的值是不是我想要的。這就麻煩一點(diǎn)點(diǎn)。***反應(yīng)是考慮xpath。后來覺得不如自己做一個(gè)得了,權(quán)當(dāng)是按需定制一個(gè)輕量級(jí)的xpath。
首先定義XMLSearchUnit類,這個(gè)類的實(shí)例用來描述一個(gè)需要在XML中搜索的值,值可以是xml節(jié)點(diǎn)的值,或者是節(jié)點(diǎn)的屬性。
- package com.deepnighttwo.resourceresolver.douban.resolver.utils;
- import java.util.HashMap;
- import java.util.Map;
- import org.xml.sax.Attributes;
- /**
- *
- * Represent a search task. Target could be value of a node or attribute of the
- * node.
- *
- * @author mzang
- */
- public class XMLSearchUnit {
- // attribute values to be matched during search
- private Map<String, String> attributeMatchValidation = new HashMap<String, String>();
- // if target is an attribute, then set this member to be the attribute name.
- // if it is null or empty, then means the target is node value.
- private String expectedAttr;
- // xml path, format is: /node_name/node_name/...
- private String xmlPath;
- public XMLSearchUnit(String xmlPath) {
- this.xmlPath = xmlPath;
- }
- /**
- * if current node meets the search conditions or not. Meets means the path
- * is correct and the attribute value is matched.
- *
- * @param path
- * @param attributes
- * @return
- */
- public boolean match(String path, Attributes attributes) {
- if (xmlPath.equals(path) == false) {
- return false;
- }
- for (String key : attributeMatchValidation.keySet()) {
- String exp = attributeMatchValidation.get(key);
- String compare = attributes.getValue(key);
- if (exp.equalsIgnoreCase(compare) == false) {
- return false;
- }
- }
- return true;
- }
- public Map<String, String> getAttributeMatchValidation() {
- return attributeMatchValidation;
- }
- public void addAttributeValidation(String key, String value) {
- attributeMatchValidation.put(key, value);
- }
- public String getXmlPath() {
- return xmlPath;
- }
- public void setAttributeMatchValidation(
- Map<String, String> attributeMatchValidation) {
- this.attributeMatchValidation = attributeMatchValidation;
- }
- public String getExpectedAttr() {
- return expectedAttr;
- }
- /**
- * if target is node value, then set expectedAttr to null. if target is an
- * attribute value, set it to be the attribute name.
- *
- * @param expectedAttr
- */
- public void setExpectedAttr(String expectedAttr) {
- this.expectedAttr = expectedAttr;
- }
- /**
- * hash code can be cached if all properties are not be be changed.
- */
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime
- * result
- + ((attributeMatchValidation == null) ? 0
- : attributeMatchValidation.hashCode());
- result = prime * result
- + ((expectedAttr == null) ? 0 : expectedAttr.hashCode());
- result = prime * result + ((xmlPath == null) ? 0 : xmlPath.hashCode());
- return result;
- }
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- XMLSearchUnit other = (XMLSearchUnit) obj;
- if (attributeMatchValidation == null) {
- if (other.attributeMatchValidation != null)
- return false;
- } else if (!attributeMatchValidation
- .equals(other.attributeMatchValidation))
- return false;
- if (expectedAttr == null) {
- if (other.expectedAttr != null)
- return false;
- } else if (!expectedAttr.equals(other.expectedAttr))
- return false;
- if (xmlPath == null) {
- if (other.xmlPath != null)
- return false;
- } else if (!xmlPath.equals(other.xmlPath))
- return false;
- return true;
- }
- }
這個(gè)類比較簡(jiǎn)單。就是用一個(gè)hashmap保待匹配的attribut鍵值對(duì),用一個(gè)字符串表示期待的attribute name,用一個(gè)字符串表示期待的node path。
然后就是如何在SAXP里用到這個(gè)類的實(shí)例去搜索了。
- package com.deepnighttwo.resourceresolver.douban.resolver.utils;
- import java.io.InputStream;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import javax.xml.parsers.SAXParser;
- import javax.xml.parsers.SAXParserFactory;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- /**
- *
- * SAXP parser working with XMLSearchUnit.
- *
- * @author mzang
- */
- public class DoubanSearchParser extends DefaultHandler {
- // create and initial search units
- public static final XMLSearchUnit DETAILS_LINK_API_PATH = new XMLSearchUnit(
- "/feed/entry/id");
- public static final XMLSearchUnit DETAILS_CONTENT_PATH = new XMLSearchUnit(
- "/entry/summary");
- public static final XMLSearchUnit DETAILS_TITLE_PATH = new XMLSearchUnit(
- "/entry/title");
- public static final XMLSearchUnit DETAILS_CHINESE_NAME_PATH = new XMLSearchUnit(
- "/entry/db:attribute");
- public static final XMLSearchUnit DETAILS_RATINGE_PATH = new XMLSearchUnit(
- "/entry/gd:rating");
- public static final XMLSearchUnit DETAILS_RATINGE_RATER_COUNT_PATH = new XMLSearchUnit(
- "/entry/gd:rating");
- public static final XMLSearchUnit DETAILS_LINK_URL_PATH = new XMLSearchUnit(
- "/feed/entry/link");
- static {
- DETAILS_LINK_URL_PATH.addAttributeValidation("rel", "alternate");
- DETAILS_LINK_URL_PATH.setExpectedAttr("href");
- DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang", "zh_CN");
- DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name", "aka");
- DETAILS_RATINGE_PATH.setExpectedAttr("average");
- DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters");
- }
- // a map to store the XMLSearchUnit and value
- private Map<XMLSearchUnit, String> results = new HashMap<XMLSearchUnit, String>();
- // a counter of search unit. if it is 0, then all search unit finds a match
- // value and the result of the XML will be skipped.
- private int count = 0;
- private StringBuilder path = new StringBuilder();
- private static final String pathSeparater = "/";
- private XMLSearchUnit[] searchUnits;
- List<XMLSearchUnit> foundItems = new ArrayList<XMLSearchUnit>();
- /**
- * constructor, accept XML input stream, 0 or more search unit instances.
- *
- * @param input
- * @param expectedPath
- * @return
- */
- public Map<XMLSearchUnit, String> parseResults(InputStream input,
- XMLSearchUnit... expectedPath) {
- for (XMLSearchUnit search : expectedPath) {
- results.put(search, null);
- }
- searchUnits = expectedPath;
- count = expectedPath.length;
- XMLReader xmlReader = null;
- try {
- SAXParserFactory spfactory = SAXParserFactory.newInstance();
- spfactory.setValidating(false);
- SAXParser saxParser = spfactory.newSAXParser();
- xmlReader = saxParser.getXMLReader();
- xmlReader.setContentHandler(this);
- xmlReader.parse(new InputSource(input));
- } catch (Exception e) {
- System.err.println(e);
- System.exit(1);
- }
- return results;
- }
- private void addToPath(String addPath) {
- path.append(pathSeparater).append(addPath.toLowerCase());
- }
- private void popPath() {
- int index = path.lastIndexOf(pathSeparater);
- // String removedPath = path.substring(index);
- path.delete(index, path.length());
- }
- @Override
- public void startElement(String uri, String localName, String qName,
- Attributes attributes) throws SAXException {
- foundItems.clear();
- if (count == 0) {
- return;
- }
- // update path
- addToPath(qName);
- List<XMLSearchUnit> foundAttrItems = null;
- // check if current node matches search units. if it is a node value
- // search, then store it in a member variable named foundItems because
- // the value of the node is known only when reaches the end of the
- // node.but for attribute search, it value is known here. So then are
- // put in a local variable list named foundAttrItems.
- for (XMLSearchUnit unit : searchUnits) {
- if (unit.match(path.toString(), attributes) == true) {
- if (unit.getExpectedAttr() == null) {
- foundItems.add(unit);
- } else {
- if (foundAttrItems == null) {
- foundAttrItems = new ArrayList<XMLSearchUnit>();
- }
- foundAttrItems.add(unit);
- }
- }
- }
- // if no attribute match, return.
- if (foundAttrItems == null) {
- return;
- }
- // fill search unit value using attribute value. update count.
- for (XMLSearchUnit attrUnit : foundAttrItems) {
- String attrValue = attributes.getValue(attrUnit.getExpectedAttr());
- if (results.get(attrUnit) == null) {
- count--;
- }
- results.put(attrUnit, attrValue);
- count--;
- }
- }
- /**
- * if current node matches, the the node value is useful, store it.
- */
- @Override
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- if (count == 0) {
- return;
- }
- if (foundItems.size() == 0) {
- return;
- }
- for (XMLSearchUnit unit : foundItems) {
- String content = new String(ch, start, length);
- if (results.get(unit) == null) {
- count--;
- }
- results.put(unit, content);
- }
- }
- @Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
- foundItems.clear();
- if (count == 0) {
- return;
- }
- popPath();
- }
- }
原文鏈接:http://www.cnblogs.com/deepnighttwo/archive/2011/03/13/1982748.html
【編輯推薦】