aboutsummaryrefslogtreecommitdiff
path: root/imports/codemirror/mode/pig/pig.js
blob: b7399b43d1143a750a3e4c8fe4ff039bc9c38460 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*
 *	Pig Latin Mode for CodeMirror 2 
 *	@author Prasanth Jayachandran
 *	@link 	https://github.com/prasanthj/pig-codemirror-2
 *  This implementation is adapted from PL/SQL mode in CodeMirror 2.
*/
CodeMirror.defineMode("pig", function(config, parserConfig) {
	var indentUnit = config.indentUnit,
		keywords = parserConfig.keywords,
		builtins = parserConfig.builtins,
		types = parserConfig.types,
		multiLineStrings = parserConfig.multiLineStrings;
	
	var isOperatorChar = /[*+\-%<>=&?:\/!|]/;
	
	function chain(stream, state, f) {
		state.tokenize = f;
		return f(stream, state);
	}
	
	var type;
	function ret(tp, style) {
		type = tp;
		return style;
	}
	
	function tokenComment(stream, state) {
		var isEnd = false;
		var ch;
		while(ch = stream.next()) {
			if(ch == "/" && isEnd) {
				state.tokenize = tokenBase;
				break;
			}
			isEnd = (ch == "*");
		}
		return ret("comment", "comment");
	}
	
	function tokenString(quote) {
		return function(stream, state) {
			var escaped = false, next, end = false;
			while((next = stream.next()) != null) {
				if (next == quote && !escaped) {
					end = true; break;
				}
				escaped = !escaped && next == "\\";
			}
			if (end || !(escaped || multiLineStrings))
				state.tokenize = tokenBase;
			return ret("string", "error");
		};
	}
	
	function tokenBase(stream, state) {
		var ch = stream.next();
		
		// is a start of string?
		if (ch == '"' || ch == "'")
			return chain(stream, state, tokenString(ch));
		// is it one of the special chars
		else if(/[\[\]{}\(\),;\.]/.test(ch))
			return ret(ch);
		// is it a number?
		else if(/\d/.test(ch)) {
			stream.eatWhile(/[\w\.]/);
			return ret("number", "number");
		}
		// multi line comment or operator
		else if (ch == "/") {
			if (stream.eat("*")) {
				return chain(stream, state, tokenComment);
			}
			else {
				stream.eatWhile(isOperatorChar);
				return ret("operator", "operator");
			}
		}
		// single line comment or operator
		else if (ch=="-") {
			if(stream.eat("-")){
				stream.skipToEnd();
				return ret("comment", "comment");
			}
			else {
				stream.eatWhile(isOperatorChar);
				return ret("operator", "operator");
			}
		}
		// is it an operator
		else if (isOperatorChar.test(ch)) {
			stream.eatWhile(isOperatorChar);
			return ret("operator", "operator");
		}
		else {
			// get the while word
			stream.eatWhile(/[\w\$_]/);
			// is it one of the listed keywords?
			if (keywords && keywords.propertyIsEnumerable(stream.current().toUpperCase())) {
				if (stream.eat(")") || stream.eat(".")) {
					//keywords can be used as variables like flatten(group), group.$0 etc..
				}
				else {
					return ("keyword", "keyword");
				}
			}
			// is it one of the builtin functions?
			if (builtins && builtins.propertyIsEnumerable(stream.current().toUpperCase()))
			{
				return ("keyword", "variable-2")
			}
			// is it one of the listed types?
			if (types && types.propertyIsEnumerable(stream.current().toUpperCase()))
				return ("keyword", "variable-3")
			// default is a 'word'
			return ret("word", "pig-word");
		}
	}
	
	// Interface
	return {
		startState: function(basecolumn) {
			return {
				tokenize: tokenBase,
				startOfLine: true
			};
		},
		
		token: function(stream, state) {
			if(stream.eatSpace()) return null;
			var style = state.tokenize(stream, state);
			return style;
		}
	};
});

(function() {
	function keywords(str) {
		var obj = {}, words = str.split(" ");
		for (var i = 0; i < words.length; ++i) obj[words[i]] = true;
 		return obj;
 	}

	// builtin funcs taken from trunk revision 1303237
	var pBuiltins = "ABS ACOS ARITY ASIN ATAN AVG BAGSIZE BINSTORAGE BLOOM BUILDBLOOM CBRT CEIL " 
	+ "CONCAT COR COS COSH COUNT COUNT_STAR COV CONSTANTSIZE CUBEDIMENSIONS DIFF DISTINCT DOUBLEABS "
	+ "DOUBLEAVG DOUBLEBASE DOUBLEMAX DOUBLEMIN DOUBLEROUND DOUBLESUM EXP FLOOR FLOATABS FLOATAVG "
	+ "FLOATMAX FLOATMIN FLOATROUND FLOATSUM GENERICINVOKER INDEXOF INTABS INTAVG INTMAX INTMIN "
	+ "INTSUM INVOKEFORDOUBLE INVOKEFORFLOAT INVOKEFORINT INVOKEFORLONG INVOKEFORSTRING INVOKER "
	+ "ISEMPTY JSONLOADER JSONMETADATA JSONSTORAGE LAST_INDEX_OF LCFIRST LOG LOG10 LOWER LONGABS "
	+ "LONGAVG LONGMAX LONGMIN LONGSUM MAX MIN MAPSIZE MONITOREDUDF NONDETERMINISTIC OUTPUTSCHEMA  "
	+ "PIGSTORAGE PIGSTREAMING RANDOM REGEX_EXTRACT REGEX_EXTRACT_ALL REPLACE ROUND SIN SINH SIZE "
	+ "SQRT STRSPLIT SUBSTRING SUM STRINGCONCAT STRINGMAX STRINGMIN STRINGSIZE TAN TANH TOBAG "
	+ "TOKENIZE TOMAP TOP TOTUPLE TRIM TEXTLOADER TUPLESIZE UCFIRST UPPER UTF8STORAGECONVERTER "; 
	
	// taken from QueryLexer.g
	var pKeywords = "VOID IMPORT RETURNS DEFINE LOAD FILTER FOREACH ORDER CUBE DISTINCT COGROUP "
	+ "JOIN CROSS UNION SPLIT INTO IF OTHERWISE ALL AS BY USING INNER OUTER ONSCHEMA PARALLEL "
	+ "PARTITION GROUP AND OR NOT GENERATE FLATTEN ASC DESC IS STREAM THROUGH STORE MAPREDUCE "
	+ "SHIP CACHE INPUT OUTPUT STDERROR STDIN STDOUT LIMIT SAMPLE LEFT RIGHT FULL EQ GT LT GTE LTE " 
	+ "NEQ MATCHES TRUE FALSE "; 
	
	// data types
	var pTypes = "BOOLEAN INT LONG FLOAT DOUBLE CHARARRAY BYTEARRAY BAG TUPLE MAP "
	
	CodeMirror.defineMIME("text/x-pig", {
	 name: "pig",
	 builtins: keywords(pBuiltins),
	 keywords: keywords(pKeywords),
	 types: keywords(pTypes)
	 });
}());