aboutsummaryrefslogtreecommitdiff
path: root/imports/codemirror/mode/xmlpure/xmlpure.js
diff options
context:
space:
mode:
authorPushkar Joshi2012-02-24 12:08:49 -0800
committerPushkar Joshi2012-02-24 12:08:49 -0800
commit03ca7a5ed13c25faaa9100bb666e062fd15335e6 (patch)
treec51112223ceb9121cd595a60335eb2795215590f /imports/codemirror/mode/xmlpure/xmlpure.js
parentfcb12cc09eb3cd3b42bd215877ba18f449275b75 (diff)
parent053fc63a2950c7a5ee4ebf98033b64d474a3c46e (diff)
downloadninja-03ca7a5ed13c25faaa9100bb666e062fd15335e6.tar.gz
Merge branch 'pentool' into brushtool
Conflicts: imports/codemirror/mode/scheme/scheme.js js/tools/BrushTool.js
Diffstat (limited to 'imports/codemirror/mode/xmlpure/xmlpure.js')
-rwxr-xr-ximports/codemirror/mode/xmlpure/xmlpure.js485
1 files changed, 485 insertions, 0 deletions
diff --git a/imports/codemirror/mode/xmlpure/xmlpure.js b/imports/codemirror/mode/xmlpure/xmlpure.js
new file mode 100755
index 00000000..6102f51d
--- /dev/null
+++ b/imports/codemirror/mode/xmlpure/xmlpure.js
@@ -0,0 +1,485 @@
1/**
2 * xmlpure.js
3 *
4 * Building upon and improving the CodeMirror 2 XML parser
5 * @author: Dror BG (deebug.dev@gmail.com)
6 * @date: August, 2011
7 */
8
9CodeMirror.defineMode("xmlpure", function(config, parserConfig) {
10 // constants
11 var STYLE_ERROR = "error";
12 var STYLE_INSTRUCTION = "comment";
13 var STYLE_COMMENT = "comment";
14 var STYLE_ELEMENT_NAME = "tag";
15 var STYLE_ATTRIBUTE = "attribute";
16 var STYLE_WORD = "string";
17 var STYLE_TEXT = "atom";
18
19 var TAG_INSTRUCTION = "!instruction";
20 var TAG_CDATA = "!cdata";
21 var TAG_COMMENT = "!comment";
22 var TAG_TEXT = "!text";
23
24 var doNotIndent = {
25 "!cdata": true,
26 "!comment": true,
27 "!text": true,
28 "!instruction": true
29 };
30
31 // options
32 var indentUnit = config.indentUnit;
33
34 ///////////////////////////////////////////////////////////////////////////
35 // helper functions
36
37 // chain a parser to another parser
38 function chain(stream, state, parser) {
39 state.tokenize = parser;
40 return parser(stream, state);
41 }
42
43 // parse a block (comment, CDATA or text)
44 function inBlock(style, terminator, nextTokenize) {
45 return function(stream, state) {
46 while (!stream.eol()) {
47 if (stream.match(terminator)) {
48 popContext(state);
49 state.tokenize = nextTokenize;
50 break;
51 }
52 stream.next();
53 }
54 return style;
55 };
56 }
57
58 // go down a level in the document
59 // (hint: look at who calls this function to know what the contexts are)
60 function pushContext(state, tagName) {
61 var noIndent = doNotIndent.hasOwnProperty(tagName) || (state.context && state.context.doIndent);
62 var newContext = {
63 tagName: tagName,
64 prev: state.context,
65 indent: state.context ? state.context.indent + indentUnit : 0,
66 lineNumber: state.lineNumber,
67 indented: state.indented,
68 noIndent: noIndent
69 };
70 state.context = newContext;
71 }
72
73 // go up a level in the document
74 function popContext(state) {
75 if (state.context) {
76 var oldContext = state.context;
77 state.context = oldContext.prev;
78 return oldContext;
79 }
80
81 // we shouldn't be here - it means we didn't have a context to pop
82 return null;
83 }
84
85 // return true if the current token is seperated from the tokens before it
86 // which means either this is the start of the line, or there is at least
87 // one space or tab character behind the token
88 // otherwise returns false
89 function isTokenSeparated(stream) {
90 return stream.sol() ||
91 stream.string.charAt(stream.start - 1) == " " ||
92 stream.string.charAt(stream.start - 1) == "\t";
93 }
94
95 ///////////////////////////////////////////////////////////////////////////
96 // context: document
97 //
98 // an XML document can contain:
99 // - a single declaration (if defined, it must be the very first line)
100 // - exactly one root element
101 // @todo try to actually limit the number of root elements to 1
102 // - zero or more comments
103 function parseDocument(stream, state) {
104 if(stream.eat("<")) {
105 if(stream.eat("?")) {
106 // processing instruction
107 pushContext(state, TAG_INSTRUCTION);
108 state.tokenize = parseProcessingInstructionStartTag;
109 return STYLE_INSTRUCTION;
110 } else if(stream.match("!--")) {
111 // new context: comment
112 pushContext(state, TAG_COMMENT);
113 return chain(stream, state, inBlock(STYLE_COMMENT, "-->", parseDocument));
114 } else if(stream.eatSpace() || stream.eol() ) {
115 stream.skipToEnd();
116 return STYLE_ERROR;
117 } else {
118 // element
119 state.tokenize = parseElementTagName;
120 return STYLE_ELEMENT_NAME;
121 }
122 }
123
124 // error on line
125 stream.skipToEnd();
126 return STYLE_ERROR;
127 }
128
129 ///////////////////////////////////////////////////////////////////////////
130 // context: XML element start-tag or end-tag
131 //
132 // - element start-tag can contain attributes
133 // - element start-tag may self-close (or start an element block if it doesn't)
134 // - element end-tag can contain only the tag name
135 function parseElementTagName(stream, state) {
136 // get the name of the tag
137 var startPos = stream.pos;
138 if(stream.match(/^[a-zA-Z_:][-a-zA-Z0-9_:.]*/)) {
139 // element start-tag
140 var tagName = stream.string.substring(startPos, stream.pos);
141 pushContext(state, tagName);
142 state.tokenize = parseElement;
143 return STYLE_ELEMENT_NAME;
144 } else if(stream.match(/^\/[a-zA-Z_:][-a-zA-Z0-9_:.]*( )*>/)) {
145 // element end-tag
146 var endTagName = stream.string.substring(startPos + 1, stream.pos - 1).trim();
147 var oldContext = popContext(state);
148 state.tokenize = state.context == null ? parseDocument : parseElementBlock;
149 if(oldContext == null || endTagName != oldContext.tagName) {
150 // the start and end tag names should match - error
151 return STYLE_ERROR;
152 }
153 return STYLE_ELEMENT_NAME;
154 } else {
155 // no tag name - error
156 state.tokenize = state.context == null ? parseDocument : parseElementBlock;
157 stream.eatWhile(/[^>]/);
158 stream.eat(">");
159 return STYLE_ERROR;
160 }
161
162 stream.skipToEnd();
163 return null;
164 }
165
166 function parseElement(stream, state) {
167 if(stream.match(/^\/>/)) {
168 // self-closing tag
169 popContext(state);
170 state.tokenize = state.context == null ? parseDocument : parseElementBlock;
171 return STYLE_ELEMENT_NAME;
172 } else if(stream.eat(/^>/)) {
173 state.tokenize = parseElementBlock;
174 return STYLE_ELEMENT_NAME;
175 } else if(isTokenSeparated(stream) && stream.match(/^[a-zA-Z_:][-a-zA-Z0-9_:.]*( )*=/)) {
176 // attribute
177 state.tokenize = parseAttribute;
178 return STYLE_ATTRIBUTE;
179 }
180
181 // no other options - this is an error
182 state.tokenize = state.context == null ? parseDocument : parseDocument;
183 stream.eatWhile(/[^>]/);
184 stream.eat(">");
185 return STYLE_ERROR;
186 }
187
188 ///////////////////////////////////////////////////////////////////////////
189 // context: attribute
190 //
191 // attribute values may contain everything, except:
192 // - the ending quote (with ' or ") - this marks the end of the value
193 // - the character "<" - should never appear
194 // - ampersand ("&") - unless it starts a reference: a string that ends with a semi-colon (";")
195 // ---> note: this parser is lax in what may be put into a reference string,
196 // ---> consult http://www.w3.org/TR/REC-xml/#NT-Reference if you want to make it tighter
197 function parseAttribute(stream, state) {
198 var quote = stream.next();
199 if(quote != "\"" && quote != "'") {
200 // attribute must be quoted
201 stream.skipToEnd();
202 state.tokenize = parseElement;
203 return STYLE_ERROR;
204 }
205
206 state.tokParams.quote = quote;
207 state.tokenize = parseAttributeValue;
208 return STYLE_WORD;
209 }
210
211 // @todo: find out whether this attribute value spans multiple lines,
212 // and if so, push a context for it in order not to indent it
213 // (or something of the sort..)
214 function parseAttributeValue(stream, state) {
215 var ch = "";
216 while(!stream.eol()) {
217 ch = stream.next();
218 if(ch == state.tokParams.quote) {
219 // end quote found