123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425 |
- 'use strict';
- const util = require('./util');
- const defaultOptions = {
- allowBooleanAttributes: false, //A tag can have attributes without any value
- unpairedTags: []
- };
- //const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
- exports.validate = function (xmlData, options) {
- options = Object.assign({}, defaultOptions, options);
- //xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
- //xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
- //xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
- const tags = [];
- let tagFound = false;
- //indicates that the root tag has been closed (aka. depth 0 has been reached)
- let reachedRoot = false;
- if (xmlData[0] === '\ufeff') {
- // check for byte order mark (BOM)
- xmlData = xmlData.substr(1);
- }
-
- for (let i = 0; i < xmlData.length; i++) {
- if (xmlData[i] === '<' && xmlData[i+1] === '?') {
- i+=2;
- i = readPI(xmlData,i);
- if (i.err) return i;
- }else if (xmlData[i] === '<') {
- //starting of tag
- //read until you reach to '>' avoiding any '>' in attribute value
- let tagStartPos = i;
- i++;
-
- if (xmlData[i] === '!') {
- i = readCommentAndCDATA(xmlData, i);
- continue;
- } else {
- let closingTag = false;
- if (xmlData[i] === '/') {
- //closing tag
- closingTag = true;
- i++;
- }
- //read tagname
- let tagName = '';
- for (; i < xmlData.length &&
- xmlData[i] !== '>' &&
- xmlData[i] !== ' ' &&
- xmlData[i] !== '\t' &&
- xmlData[i] !== '\n' &&
- xmlData[i] !== '\r'; i++
- ) {
- tagName += xmlData[i];
- }
- tagName = tagName.trim();
- //console.log(tagName);
- if (tagName[tagName.length - 1] === '/') {
- //self closing tag without attributes
- tagName = tagName.substring(0, tagName.length - 1);
- //continue;
- i--;
- }
- if (!validateTagName(tagName)) {
- let msg;
- if (tagName.trim().length === 0) {
- msg = "Invalid space after '<'.";
- } else {
- msg = "Tag '"+tagName+"' is an invalid name.";
- }
- return getErrorObject('InvalidTag', msg, getLineNumberForPosition(xmlData, i));
- }
- const result = readAttributeStr(xmlData, i);
- if (result === false) {
- return getErrorObject('InvalidAttr', "Attributes for '"+tagName+"' have open quote.", getLineNumberForPosition(xmlData, i));
- }
- let attrStr = result.value;
- i = result.index;
- if (attrStr[attrStr.length - 1] === '/') {
- //self closing tag
- const attrStrStart = i - attrStr.length;
- attrStr = attrStr.substring(0, attrStr.length - 1);
- const isValid = validateAttributeString(attrStr, options);
- if (isValid === true) {
- tagFound = true;
- //continue; //text may presents after self closing tag
- } else {
- //the result from the nested function returns the position of the error within the attribute
- //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
- //this gives us the absolute index in the entire xml, which we can use to find the line at last
- return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, attrStrStart + isValid.err.line));
- }
- } else if (closingTag) {
- if (!result.tagClosed) {
- return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' doesn't have proper closing.", getLineNumberForPosition(xmlData, i));
- } else if (attrStr.trim().length > 0) {
- return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' can't have attributes or invalid starting.", getLineNumberForPosition(xmlData, tagStartPos));
- } else if (tags.length === 0) {
- return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' has not been opened.", getLineNumberForPosition(xmlData, tagStartPos));
- } else {
- const otg = tags.pop();
- if (tagName !== otg.tagName) {
- let openPos = getLineNumberForPosition(xmlData, otg.tagStartPos);
- return getErrorObject('InvalidTag',
- "Expected closing tag '"+otg.tagName+"' (opened in line "+openPos.line+", col "+openPos.col+") instead of closing tag '"+tagName+"'.",
- getLineNumberForPosition(xmlData, tagStartPos));
- }
- //when there are no more tags, we reached the root level.
- if (tags.length == 0) {
- reachedRoot = true;
- }
- }
- } else {
- const isValid = validateAttributeString(attrStr, options);
- if (isValid !== true) {
- //the result from the nested function returns the position of the error within the attribute
- //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
- //this gives us the absolute index in the entire xml, which we can use to find the line at last
- return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, i - attrStr.length + isValid.err.line));
- }
- //if the root level has been reached before ...
- if (reachedRoot === true) {
- return getErrorObject('InvalidXml', 'Multiple possible root nodes found.', getLineNumberForPosition(xmlData, i));
- } else if(options.unpairedTags.indexOf(tagName) !== -1){
- //don't push into stack
- } else {
- tags.push({tagName, tagStartPos});
- }
- tagFound = true;
- }
- //skip tag text value
- //It may include comments and CDATA value
- for (i++; i < xmlData.length; i++) {
- if (xmlData[i] === '<') {
- if (xmlData[i + 1] === '!') {
- //comment or CADATA
- i++;
- i = readCommentAndCDATA(xmlData, i);
- continue;
- } else if (xmlData[i+1] === '?') {
- i = readPI(xmlData, ++i);
- if (i.err) return i;
- } else{
- break;
- }
- } else if (xmlData[i] === '&') {
- const afterAmp = validateAmpersand(xmlData, i);
- if (afterAmp == -1)
- return getErrorObject('InvalidChar', "char '&' is not expected.", getLineNumberForPosition(xmlData, i));
- i = afterAmp;
- }else{
- if (reachedRoot === true && !isWhiteSpace(xmlData[i])) {
- return getErrorObject('InvalidXml', "Extra text at the end", getLineNumberForPosition(xmlData, i));
- }
- }
- } //end of reading tag text value
- if (xmlData[i] === '<') {
- i--;
- }
- }
- } else {
- if ( isWhiteSpace(xmlData[i])) {
- continue;
- }
- return getErrorObject('InvalidChar', "char '"+xmlData[i]+"' is not expected.", getLineNumberForPosition(xmlData, i));
- }
- }
- if (!tagFound) {
- return getErrorObject('InvalidXml', 'Start tag expected.', 1);
- }else if (tags.length == 1) {
- return getErrorObject('InvalidTag', "Unclosed tag '"+tags[0].tagName+"'.", getLineNumberForPosition(xmlData, tags[0].tagStartPos));
- }else if (tags.length > 0) {
- return getErrorObject('InvalidXml', "Invalid '"+
- JSON.stringify(tags.map(t => t.tagName), null, 4).replace(/\r?\n/g, '')+
- "' found.", {line: 1, col: 1});
- }
- return true;
- };
- function isWhiteSpace(char){
- return char === ' ' || char === '\t' || char === '\n' || char === '\r';
- }
- /**
- * Read Processing insstructions and skip
- * @param {*} xmlData
- * @param {*} i
- */
- function readPI(xmlData, i) {
- const start = i;
- for (; i < xmlData.length; i++) {
- if (xmlData[i] == '?' || xmlData[i] == ' ') {
- //tagname
- const tagname = xmlData.substr(start, i - start);
- if (i > 5 && tagname === 'xml') {
- return getErrorObject('InvalidXml', 'XML declaration allowed only at the start of the document.', getLineNumberForPosition(xmlData, i));
- } else if (xmlData[i] == '?' && xmlData[i + 1] == '>') {
- //check if valid attribut string
- i++;
- break;
- } else {
- continue;
- }
- }
- }
- return i;
- }
- function readCommentAndCDATA(xmlData, i) {
- if (xmlData.length > i + 5 && xmlData[i + 1] === '-' && xmlData[i + 2] === '-') {
- //comment
- for (i += 3; i < xmlData.length; i++) {
- if (xmlData[i] === '-' && xmlData[i + 1] === '-' && xmlData[i + 2] === '>') {
- i += 2;
- break;
- }
- }
- } else if (
- xmlData.length > i + 8 &&
- xmlData[i + 1] === 'D' &&
- xmlData[i + 2] === 'O' &&
- xmlData[i + 3] === 'C' &&
- xmlData[i + 4] === 'T' &&
- xmlData[i + 5] === 'Y' &&
- xmlData[i + 6] === 'P' &&
- xmlData[i + 7] === 'E'
- ) {
- let angleBracketsCount = 1;
- for (i += 8; i < xmlData.length; i++) {
- if (xmlData[i] === '<') {
- angleBracketsCount++;
- } else if (xmlData[i] === '>') {
- angleBracketsCount--;
- if (angleBracketsCount === 0) {
- break;
- }
- }
- }
- } else if (
- xmlData.length > i + 9 &&
- xmlData[i + 1] === '[' &&
- xmlData[i + 2] === 'C' &&
- xmlData[i + 3] === 'D' &&
- xmlData[i + 4] === 'A' &&
- xmlData[i + 5] === 'T' &&
- xmlData[i + 6] === 'A' &&
- xmlData[i + 7] === '['
- ) {
- for (i += 8; i < xmlData.length; i++) {
- if (xmlData[i] === ']' && xmlData[i + 1] === ']' && xmlData[i + 2] === '>') {
- i += 2;
- break;
- }
- }
- }
- return i;
- }
- const doubleQuote = '"';
- const singleQuote = "'";
- /**
- * Keep reading xmlData until '<' is found outside the attribute value.
- * @param {string} xmlData
- * @param {number} i
- */
- function readAttributeStr(xmlData, i) {
- let attrStr = '';
- let startChar = '';
- let tagClosed = false;
- for (; i < xmlData.length; i++) {
- if (xmlData[i] === doubleQuote || xmlData[i] === singleQuote) {
- if (startChar === '') {
- startChar = xmlData[i];
- } else if (startChar !== xmlData[i]) {
- //if vaue is enclosed with double quote then single quotes are allowed inside the value and vice versa
- } else {
- startChar = '';
- }
- } else if (xmlData[i] === '>') {
- if (startChar === '') {
- tagClosed = true;
- break;
- }
- }
- attrStr += xmlData[i];
- }
- if (startChar !== '') {
- return false;
- }
- return {
- value: attrStr,
- index: i,
- tagClosed: tagClosed
- };
- }
- /**
- * Select all the attributes whether valid or invalid.
- */
- const validAttrStrRegxp = new RegExp('(\\s*)([^\\s=]+)(\\s*=)?(\\s*([\'"])(([\\s\\S])*?)\\5)?', 'g');
- //attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
- function validateAttributeString(attrStr, options) {
- //console.log("start:"+attrStr+":end");
- //if(attrStr.trim().length === 0) return true; //empty string
- const matches = util.getAllMatches(attrStr, validAttrStrRegxp);
- const attrNames = {};
- for (let i = 0; i < matches.length; i++) {
- if (matches[i][1].length === 0) {
- //nospace before attribute name: a="sd"b="saf"
- return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' has no space in starting.", getPositionFromMatch(matches[i]))
- } else if (matches[i][3] !== undefined && matches[i][4] === undefined) {
- return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' is without value.", getPositionFromMatch(matches[i]));
- } else if (matches[i][3] === undefined && !options.allowBooleanAttributes) {
- //independent attribute: ab
- return getErrorObject('InvalidAttr', "boolean attribute '"+matches[i][2]+"' is not allowed.", getPositionFromMatch(matches[i]));
- }
- /* else if(matches[i][6] === undefined){//attribute without value: ab=
- return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
- } */
- const attrName = matches[i][2];
- if (!validateAttrName(attrName)) {
- return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is an invalid name.", getPositionFromMatch(matches[i]));
- }
- if (!attrNames.hasOwnProperty(attrName)) {
- //check for duplicate attribute.
- attrNames[attrName] = 1;
- } else {
- return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is repeated.", getPositionFromMatch(matches[i]));
- }
- }
- return true;
- }
- function validateNumberAmpersand(xmlData, i) {
- let re = /\d/;
- if (xmlData[i] === 'x') {
- i++;
- re = /[\da-fA-F]/;
- }
- for (; i < xmlData.length; i++) {
- if (xmlData[i] === ';')
- return i;
- if (!xmlData[i].match(re))
- break;
- }
- return -1;
- }
- function validateAmpersand(xmlData, i) {
- // https://www.w3.org/TR/xml/#dt-charref
- i++;
- if (xmlData[i] === ';')
- return -1;
- if (xmlData[i] === '#') {
- i++;
- return validateNumberAmpersand(xmlData, i);
- }
- let count = 0;
- for (; i < xmlData.length; i++, count++) {
- if (xmlData[i].match(/\w/) && count < 20)
- continue;
- if (xmlData[i] === ';')
- break;
- return -1;
- }
- return i;
- }
- function getErrorObject(code, message, lineNumber) {
- return {
- err: {
- code: code,
- msg: message,
- line: lineNumber.line || lineNumber,
- col: lineNumber.col,
- },
- };
- }
- function validateAttrName(attrName) {
- return util.isName(attrName);
- }
- // const startsWithXML = /^xml/i;
- function validateTagName(tagname) {
- return util.isName(tagname) /* && !tagname.match(startsWithXML) */;
- }
- //this function returns the line number for the character at the given index
- function getLineNumberForPosition(xmlData, index) {
- const lines = xmlData.substring(0, index).split(/\r?\n/);
- return {
- line: lines.length,
- // column number is last line's length + 1, because column numbering starts at 1:
- col: lines[lines.length - 1].length + 1
- };
- }
- //this function returns the position of the first character of match within attrStr
- function getPositionFromMatch(match) {
- return match.startIndex + match[1].length;
- }
|