2 * Author: Alex Kocharin <alex@kocharin.ru>
3 * GIT: https://github.com/rlidwka/jju
4 * License: WTFPL, grab your copy here: http://www.wtfpl.net/txt/copying/
7 // RTFM: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
9 var Uni = require('./unicode')
11 function isHexDigit(x) {
12 return (x >= '0' && x <= '9')
13 || (x >= 'A' && x <= 'F')
14 || (x >= 'a' && x <= 'f')
17 function isOctDigit(x) {
18 return x >= '0' && x <= '7'
21 function isDecDigit(x) {
22 return x >= '0' && x <= '9'
38 function formatError(input, msg, position, lineno, column, json5) {
39 var result = msg + ' at ' + (lineno + 1) + ':' + (column + 1)
40 , tmppos = position - column - 1
44 var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
46 // output no more than 70 characters before the wrong ones
47 if (tmppos < position - 70) {
48 tmppos = position - 70
52 var chr = input[++tmppos]
54 if (isLineTerminator(chr) || tmppos === input.length) {
55 if (position >= tmppos) {
56 // ending line error, so show it after the last char
63 if (position === tmppos) {
65 } else if (position > tmppos) {
66 underline += input[tmppos] === '\t' ? '\t' : ' '
69 // output no more than 78 characters on the string
70 if (srcline.length > 78) break
73 return result + '\n' + srcline + '\n' + underline
76 function parse(input, options) {
77 // parse as a standard JSON mode
81 if (options.legacy || options.mode === 'json') {
83 } else if (options.mode === 'cjson') {
85 } else if (options.mode === 'json5') {
92 var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
93 var isWhiteSpace = json5 ? Uni.isWhiteSpace : Uni.isWhiteSpaceJSON
95 var length = input.length
101 var tokenStart = function() {}
102 var tokenEnd = function(v) {return v}
106 type: 'whitespace'|'comment'|'key'|'literal'|'separator'|'newline',
107 value: 'number'|'string'|'whatever',
111 if (options._tokenize) {
114 tokenStart = function() {
115 if (start !== null) throw Error('internal error, token overlap')
119 tokenEnd = function(v, type) {
120 if (start != position) {
122 raw: input.substr(start, position-start),
124 stack: stack.slice(0),
126 if (v !== undefined) hash.value = v
127 options._tokenize.call(null, hash)
136 var column = position - linestart
139 if (position < length) {
142 .stringify(input[position])
143 .replace(/^"|"$/g, '')
144 .replace(/'/g, "\\'")
145 .replace(/\\"/g, '"')
148 if (!msg) msg = 'Unexpected token ' + token
150 if (!msg) msg = 'Unexpected end of input'
154 var error = SyntaxError(formatError(input, msg, position, lineno, column, json5))
155 error.row = lineno + 1
156 error.column = column + 1
160 function newline(chr) {
161 // account for <cr><lf>
162 if (chr === '\r' && input[position] === '\n') position++
167 function parseGeneric() {
170 while (position < length) {
172 var chr = input[position++]
174 if (chr === '"' || (chr === '\'' && json5)) {
175 return tokenEnd(parseString(chr), 'literal')
177 } else if (chr === '{') {
178 tokenEnd(undefined, 'separator')
181 } else if (chr === '[') {
182 tokenEnd(undefined, 'separator')
185 } else if (chr === '-'
188 // + number Infinity NaN
189 || (json5 && (chr === '+' || chr === 'I' || chr === 'N'))
191 return tokenEnd(parseNumber(), 'literal')
193 } else if (chr === 'n') {
195 return tokenEnd(null, 'literal')
197 } else if (chr === 't') {
199 return tokenEnd(true, 'literal')
201 } else if (chr === 'f') {
202 parseKeyword('false')
203 return tokenEnd(false, 'literal')
207 return tokenEnd(undefined)
212 function parseKey() {
215 while (position < length) {
217 var chr = input[position++]
219 if (chr === '"' || (chr === '\'' && json5)) {
220 return tokenEnd(parseString(chr), 'key')
222 } else if (chr === '{') {
223 tokenEnd(undefined, 'separator')
226 } else if (chr === '[') {
227 tokenEnd(undefined, 'separator')
230 } else if (chr === '.'
233 return tokenEnd(parseNumber(true), 'key')
236 && Uni.isIdentifierStart(chr) || (chr === '\\' && input[position] === 'u')) {
237 // unicode char or a unicode sequence
238 var rollback = position - 1
239 var result = parseIdentifier()
241 if (result === undefined) {
243 return tokenEnd(undefined)
245 return tokenEnd(result, 'key')
250 return tokenEnd(undefined)
255 function skipWhiteSpace() {
257 while (position < length) {
258 var chr = input[position++]
260 if (isLineTerminator(chr)) {
262 tokenEnd(undefined, 'whitespace')
266 tokenEnd(undefined, 'newline')
269 } else if (isWhiteSpace(chr)) {
272 } else if (chr === '/'
274 && (input[position] === '/' || input[position] === '*')
277 tokenEnd(undefined, 'whitespace')
280 skipComment(input[position++] === '*')
281 tokenEnd(undefined, 'comment')
289 return tokenEnd(undefined, 'whitespace')
292 function skipComment(multi) {
293 while (position < length) {
294 var chr = input[position++]
296 if (isLineTerminator(chr)) {
297 // LineTerminator is an end of singleline comment
299 // let parent function deal with newline
306 } else if (chr === '*' && multi) {
307 // end of multiline comment
308 if (input[position] === '/') {
319 fail('Unclosed multiline comment')
323 function parseKeyword(keyword) {
324 // keyword[0] is not checked because it should've checked earlier
326 var len = keyword.length
327 for (var i=1; i<len; i++) {
328 if (position >= length || keyword[i] != input[position]) {
336 function parseObject() {
337 var result = options.null_prototype ? Object.create(null) : {}
339 , is_non_empty = false
341 while (position < length) {
343 var item1 = parseKey()
346 var chr = input[position++]
347 tokenEnd(undefined, 'separator')
349 if (chr === '}' && item1 === undefined) {
350 if (!json5 && is_non_empty) {
352 fail('Trailing comma in object')
356 } else if (chr === ':' && item1 !== undefined) {
359 var item2 = parseGeneric()
362 if (item2 === undefined) fail('No value found for key ' + item1)
363 if (typeof(item1) !== 'string') {
364 if (!json5 || typeof(item1) !== 'number') {
365 fail('Wrong key type: ' + item1)
369 if ((item1 in empty_object || empty_object[item1] != null) && options.reserved_keys !== 'replace') {
370 if (options.reserved_keys === 'throw') {
371 fail('Reserved key: ' + item1)
373 // silently ignore it
376 if (typeof(options.reviver) === 'function') {
377 item2 = options.reviver.call(null, item1, item2)
380 if (item2 !== undefined) {
382 Object.defineProperty(result, item1, {
394 var chr = input[position++]
395 tokenEnd(undefined, 'separator')
400 } else if (chr === '}') {
416 function parseArray() {
419 while (position < length) {
421 stack.push(result.length)
422 var item = parseGeneric()
426 var chr = input[position++]
427 tokenEnd(undefined, 'separator')
429 if (item !== undefined) {
430 if (typeof(options.reviver) === 'function') {
431 item = options.reviver.call(null, String(result.length), item)
433 if (item === undefined) {
435 item = true // hack for check below, not included into result
442 if (item === undefined) {
443 fail('Elisions are not supported')
446 } else if (chr === ']') {
447 if (!json5 && item === undefined && result.length) {
449 fail('Trailing comma in array')
460 function parseNumber() {
461 // rewind because we don't know first char
465 , chr = input[position++]
468 var to_num = function(is_octal) {
469 var str = input.substr(start, position - start)
472 var result = parseInt(str.replace(/^0o?/, ''), 8)
474 var result = Number(str)
477 if (Number.isNaN(result)) {
479 fail('Bad numeric literal - "' + input.substr(start, position - start + 1) + '"')
480 } else if (!json5 && !str.match(/^-?(0|[1-9][0-9]*)(\.[0-9]+)?(e[+-]?[0-9]+)?$/i)) {
481 // additional restrictions imposed by json
483 fail('Non-json numeric literal - "' + input.substr(start, position - start + 1) + '"')
489 // ex: -5982475.249875e+29384
491 if (chr === '-' || (chr === '+' && json5)) chr = input[position++]
493 if (chr === 'N' && json5) {
498 if (chr === 'I' && json5) {
499 parseKeyword('Infinity')
501 // returning +inf or -inf
505 if (chr >= '1' && chr <= '9') {
506 // ex: -5982475.249875e+29384
507 // ^^^ skipping these
508 while (position < length && isDecDigit(input[position])) position++
509 chr = input[position++]
512 // special case for leading zero: 0.123456
514 chr = input[position++]
516 // new syntax, "0o777" old syntax, "0777"
517 var is_octal = chr === 'o' || chr === 'O' || isOctDigit(chr)
518 var is_hex = chr === 'x' || chr === 'X'
520 if (json5 && (is_octal || is_hex)) {
521 while (position < length
522 && (is_hex ? isHexDigit : isOctDigit)( input[position] )
526 if (input[start] === '-') {
529 } else if (input[start] === '+') {
533 return sign * to_num(is_octal)
538 // ex: -5982475.249875e+29384
539 // ^^^ skipping these
540 while (position < length && isDecDigit(input[position])) position++
541 chr = input[position++]
544 if (chr === 'e' || chr === 'E') {
545 chr = input[position++]
546 if (chr === '-' || chr === '+') position++
547 // ex: -5982475.249875e+29384
548 // ^^^ skipping these
549 while (position < length && isDecDigit(input[position])) position++
550 chr = input[position++]
553 // we have char in the buffer, so count for it
558 function parseIdentifier() {
559 // rewind because we don't know first char
564 while (position < length) {
565 var chr = input[position++]
568 && input[position] === 'u'
569 && isHexDigit(input[position+1])
570 && isHexDigit(input[position+2])
571 && isHexDigit(input[position+3])
572 && isHexDigit(input[position+4])
574 // UnicodeEscapeSequence
575 chr = String.fromCharCode(parseInt(input.substr(position+1, 4), 16))
580 // identifier started
581 if (Uni.isIdentifierPart(chr)) {
589 if (Uni.isIdentifierStart(chr)) {
600 function parseString(endChar) {
601 // 7.8.4 of ES262 spec
604 while (position < length) {
605 var chr = input[position++]
607 if (chr === endChar) {
610 } else if (chr === '\\') {
611 if (position >= length) fail()
612 chr = input[position++]
614 if (unescapeMap[chr] && (json5 || (chr != 'v' && chr != "'"))) {
615 result += unescapeMap[chr]
617 } else if (json5 && isLineTerminator(chr)) {
621 } else if (chr === 'u' || (chr === 'x' && json5)) {
622 // unicode/character escape sequence
623 var off = chr === 'u' ? 4 : 2
625 // validation for \uXXXX
626 for (var i=0; i<off; i++) {
627 if (position >= length) fail()
628 if (!isHexDigit(input[position])) fail('Bad escape sequence')
632 result += String.fromCharCode(parseInt(input.substr(position-off, off), 16))
633 } else if (json5 && isOctDigit(chr)) {
634 if (chr < '4' && isOctDigit(input[position]) && isOctDigit(input[position+1])) {
637 } else if (isOctDigit(input[position])) {
643 position += digits - 1
644 result += String.fromCharCode(parseInt(input.substr(position-digits, digits), 8))
645 /*if (!isOctDigit(input[position])) {
646 // \0 is allowed still
649 fail('Octal literals are not supported')
661 } else if (isLineTerminator(chr)) {
665 if (!json5 && chr.charCodeAt(0) < 32) {
667 fail('Unexpected control character')
670 // SourceCharacter but not one of " or \ or LineTerminator
679 var return_value = parseGeneric()
680 if (return_value !== undefined || position < length) {
683 if (position >= length) {
684 if (typeof(options.reviver) === 'function') {
685 return_value = options.reviver.call(null, '', return_value)
694 fail('No data, only a whitespace')
696 fail('No data, empty input')
702 * parse(text, options)
704 * parse(text, reviver)
711 module.exports.parse = function parseJSON(input, options) {
712 // support legacy functions
713 if (typeof(options) === 'function') {
719 if (input === undefined) {
720 // parse(stringify(x)) should be equal x
721 // with JSON functions it is not 'cause of undefined
722 // so we're fixing it
727 if (typeof(input) !== 'string') input = String(input)
728 if (options == null) options = {}
729 if (options.reserved_keys == null) options.reserved_keys = 'ignore'
731 if (options.reserved_keys === 'throw' || options.reserved_keys === 'ignore') {
732 if (options.null_prototype == null) {
733 options.null_prototype = true
738 return parse(input, options)
740 // jju is a recursive parser, so JSON.parse("{{{{{{{") could blow up the stack
742 // this catch is used to skip all those internal calls
743 if (err instanceof SyntaxError && err.row != null && err.column != null) {
745 err = SyntaxError(old_err.message)
746 err.column = old_err.column
747 err.row = old_err.row
753 module.exports.tokenize = function tokenizeJSON(input, options) {
754 if (options == null) options = {}
756 options._tokenize = function(smth) {
757 if (options._addstack) smth.stack.unshift.apply(smth.stack, options._addstack)
762 tokens.data = module.exports.parse(input, options)