diff options
Diffstat (limited to 'imports/codemirror/mode/xmlpure/xmlpure.js')
-rwxr-xr-x | imports/codemirror/mode/xmlpure/xmlpure.js | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/imports/codemirror/mode/xmlpure/xmlpure.js b/imports/codemirror/mode/xmlpure/xmlpure.js new file mode 100755 index 00000000..6102f51d --- /dev/null +++ b/imports/codemirror/mode/xmlpure/xmlpure.js | |||
@@ -0,0 +1,485 @@ | |||
1 | /** | ||
2 | * xmlpure.js | ||
3 | * | ||
4 | * Building upon and improving the CodeMirror 2 XML parser | ||
5 | * @author: Dror BG (deebug.dev@gmail.com) | ||
6 | * @date: August, 2011 | ||
7 | */ | ||
8 | |||
9 | CodeMirror.defineMode("xmlpure", function(config, parserConfig) { | ||
10 | // constants | ||
11 | var STYLE_ERROR = "error"; | ||
12 | var STYLE_INSTRUCTION = "comment"; | ||
13 | var STYLE_COMMENT = "comment"; | ||
14 | var STYLE_ELEMENT_NAME = "tag"; | ||
15 | var STYLE_ATTRIBUTE = "attribute"; | ||
16 | var STYLE_WORD = "string"; | ||
17 | var STYLE_TEXT = "atom"; | ||
18 | |||
19 | var TAG_INSTRUCTION = "!instruction"; | ||
20 | var TAG_CDATA = "!cdata"; | ||
21 | var TAG_COMMENT = "!comment"; | ||
22 | var TAG_TEXT = "!text"; | ||
23 | |||
24 | var doNotIndent = { | ||
25 | "!cdata": true, | ||
26 | "!comment": true, | ||
27 | "!text": true, | ||
28 | "!instruction": true | ||
29 | }; | ||
30 | |||
31 | // options | ||
32 | var indentUnit = config.indentUnit; | ||
33 | |||
34 | /////////////////////////////////////////////////////////////////////////// | ||
35 | // helper functions | ||
36 | |||
37 | // chain a parser to another parser | ||
38 | function chain(stream, state, parser) { | ||
39 | state.tokenize = parser; | ||
40 | return parser(stream, state); | ||
41 | } | ||
42 | |||
43 | // parse a block (comment, CDATA or text) | ||
44 | function inBlock(style, terminator, nextTokenize) { | ||
45 | return function(stream, state) { | ||
46 | while (!stream.eol()) { | ||
47 | if (stream.match(terminator)) { | ||
48 | popContext(state); | ||
49 | state.tokenize = nextTokenize; | ||
50 | break; | ||
51 | } | ||
52 | stream.next(); | ||
53 | } | ||
54 | return style; | ||
55 | }; | ||
56 | } | ||
57 | |||
58 | // go down a level in the document | ||
59 | // (hint: look at who calls this function to know what the contexts are) | ||
60 | function pushContext(state, tagName) { | ||
61 | var noIndent = doNotIndent.hasOwnProperty(tagName) || (state.context && state.context.doIndent); | ||
62 | var newContext = { | ||
63 | tagName: tagName, | ||
64 | prev: state.context, | ||
65 | indent: state.context ? state.context.indent + indentUnit : 0, | ||
66 | lineNumber: state.lineNumber, | ||
67 | indented: state.indented, | ||
68 | noIndent: noIndent | ||
69 | }; | ||
70 | state.context = newContext; | ||
71 | } | ||
72 | |||
73 | // go up a level in the document | ||
74 | function popContext(state) { | ||
75 | if (state.context) { | ||
76 | var oldContext = state.context; | ||
77 | state.context = oldContext.prev; | ||
78 | return oldContext; | ||
79 | } | ||
80 | |||
81 | // we shouldn't be here - it means we didn't have a context to pop | ||
82 | return null; | ||
83 | } | ||
84 | |||
85 | // return true if the current token is seperated from the tokens before it | ||
86 | // which means either this is the start of the line, or there is at least | ||
87 | // one space or tab character behind the token | ||
88 | // otherwise returns false | ||
89 | function isTokenSeparated(stream) { | ||
90 | return stream.sol() || | ||
91 | stream.string.charAt(stream.start - 1) == " " || | ||
92 | stream.string.charAt(stream.start - 1) == "\t"; | ||
93 | } | ||
94 | |||
95 | /////////////////////////////////////////////////////////////////////////// | ||
96 | // context: document | ||
97 | // | ||
98 | // an XML document can contain: | ||
99 | // - a single declaration (if defined, it must be the very first line) | ||
100 | // - exactly one root element | ||
101 | // @todo try to actually limit the number of root elements to 1 | ||
102 | // - zero or more comments | ||
103 | function parseDocument(stream, state) { | ||
104 | if(stream.eat("<")) { | ||
105 | if(stream.eat("?")) { | ||
106 | // processing instruction | ||
107 | pushContext(state, TAG_INSTRUCTION); | ||
108 | state.tokenize = parseProcessingInstructionStartTag; | ||
109 | return STYLE_INSTRUCTION; | ||
110 | } else if(stream.match("!--")) { | ||
111 | // new context: comment | ||
112 | pushContext(state, TAG_COMMENT); | ||
113 | return chain(stream, state, inBlock(STYLE_COMMENT, "-->", parseDocument)); | ||
114 | } else if(stream.eatSpace() || stream.eol() ) { | ||
115 | stream.skipToEnd(); | ||
116 | return STYLE_ERROR; | ||
117 | } else { | ||
118 | // element | ||
119 | state.tokenize = parseElementTagName; | ||
120 | return STYLE_ELEMENT_NAME; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | // error on line | ||
125 | stream.skipToEnd(); | ||
126 | return STYLE_ERROR; | ||
127 | } | ||
128 | |||
129 | /////////////////////////////////////////////////////////////////////////// | ||
130 | // context: XML element start-tag or end-tag | ||
131 | // | ||
132 | // - element start-tag can contain attributes | ||
133 | // - element start-tag may self-close (or start an element block if it doesn't) | ||
134 | // - element end-tag can contain only the tag name | ||
135 | function parseElementTagName(stream, state) { | ||
136 | // get the name of the tag | ||
137 | var startPos = stream.pos; | ||
138 | if(stream.match(/^[a-zA-Z_:][-a-zA-Z0-9_:.]*/)) { | ||
139 | // element start-tag | ||
140 | var tagName = stream.string.substring(startPos, stream.pos); | ||
141 | pushContext(state, tagName); | ||
142 | state.tokenize = parseElement; | ||
143 | return STYLE_ELEMENT_NAME; | ||
144 | } else if(stream.match(/^\/[a-zA-Z_:][-a-zA-Z0-9_:.]*( )*>/)) { | ||
145 | // element end-tag | ||
146 | var endTagName = stream.string.substring(startPos + 1, stream.pos - 1).trim(); | ||
147 | var oldContext = popContext(state); | ||
148 | state.tokenize = state.context == null ? parseDocument : parseElementBlock; | ||
149 | if(oldContext == null || endTagName != oldContext.tagName) { | ||
150 | // the start and end tag names should match - error | ||
151 | return STYLE_ERROR; | ||
152 | } | ||
153 | return STYLE_ELEMENT_NAME; | ||
154 | } else { | ||
155 | // no tag name - error | ||
156 | state.tokenize = state.context == null ? parseDocument : parseElementBlock; | ||
157 | stream.eatWhile(/[^>]/); | ||
158 | stream.eat(">"); | ||
159 | return STYLE_ERROR; | ||
160 | } | ||
161 | |||
162 | stream.skipToEnd(); | ||
163 | return null; | ||
164 | } | ||
165 | |||
166 | function parseElement(stream, state) { | ||
167 | if(stream.match(/^\/>/)) { | ||
168 | // self-closing tag | ||
169 | popContext(state); | ||
170 | state.tokenize = state.context == null ? parseDocument : parseElementBlock; | ||
171 | return STYLE_ELEMENT_NAME; | ||
172 | } else if(stream.eat(/^>/)) { | ||
173 | state.tokenize = parseElementBlock; | ||
174 | return STYLE_ELEMENT_NAME; | ||
175 | } else if(isTokenSeparated(stream) && stream.match(/^[a-zA-Z_:][-a-zA-Z0-9_:.]*( )*=/)) { | ||
176 | // attribute | ||
177 | state.tokenize = parseAttribute; | ||
178 | return STYLE_ATTRIBUTE; | ||
179 | } | ||
180 | |||
181 | // no other options - this is an error | ||
182 | state.tokenize = state.context == null ? parseDocument : parseDocument; | ||
183 | stream.eatWhile(/[^>]/); | ||
184 | stream.eat(">"); | ||
185 | return STYLE_ERROR; | ||
186 | } | ||
187 | |||
188 | /////////////////////////////////////////////////////////////////////////// | ||
189 | // context: attribute | ||
190 | // | ||
191 | // attribute values may contain everything, except: | ||
192 | // - the ending quote (with ' or ") - this marks the end of the value | ||
193 | // - the character "<" - should never appear | ||
194 | // - ampersand ("&") - unless it starts a reference: a string that ends with a semi-colon (";") | ||
195 | // ---> note: this parser is lax in what may be put into a reference string, | ||
196 | // ---> consult http://www.w3.org/TR/REC-xml/#NT-Reference if you want to make it tighter | ||
197 | function parseAttribute(stream, state) { | ||
198 | var quote = stream.next(); | ||
199 | if(quote != "\"" && quote != "'") { | ||
200 | // attribute must be quoted | ||
201 | stream.skipToEnd(); | ||
202 | state.tokenize = parseElement; | ||
203 | return STYLE_ERROR; | ||
204 | } | ||
205 | |||
206 | state.tokParams.quote = quote; | ||
207 | state.tokenize = parseAttributeValue; | ||
208 | return STYLE_WORD; | ||
209 | } | ||
210 | |||
211 | // @todo: find out whether this attribute value spans multiple lines, | ||
212 | // and if so, push a context for it in order not to indent it | ||
213 | // (or something of the sort..) | ||
214 | function parseAttributeValue(stream, state) { | ||
215 | var ch = ""; | ||
216 | while(!stream.eol()) { | ||
217 | ch = stream.next(); | ||
218 | if(ch == state.tokParams.quote) { | ||
219 | // end quote found | ||
220 | state.tokenize = parseElement; | ||
221 | return STYLE_WORD; | ||
222 | } else if(ch == "<") { | ||
223 | // can't have less-than signs in an attribute value, ever | ||
224 | stream.skipToEnd() | ||
225 | state.tokenize = parseElement; | ||
226 | return STYLE_ERROR; | ||