MediaWiki:Gadget-Global-WikitextParser.js

/**
 * WikitextParser.js is a set of methods for parsing wikitext
 * Documentation: https://www.mediawiki.org/wiki/WikitextParser.js
 * License: GNU General Public License 3 or later (http://www.gnu.org/licenses/gpl-3.0.html)
 */
// <nowiki>
window.WikitextParser = {

	replacedElements: new Map(),

	replaceElements( wikitext, elements ) {
		for ( const element of elements ) {
			if ( wikitext.includes( element ) ) {
				const index = this.replacedElements.size;
				wikitext = wikitext.replace( element, '@@@' + index + '@@@' );
				this.replacedElements.set( index, element );
			}
		}
		return wikitext;
	},

	restoreElements( wikitext ) {
		const matches = wikitext.matchAll( /@@@(\d+)@@@/g );
		for ( const match of matches ) {
			const placeholder = match[0];
			const index = Number( match[1] );
			const element = this.replacedElements.get( index );
			wikitext = wikitext.replace( placeholder, element );
			this.replacedElements.delete( index );
		}
		return wikitext;
	},

	getElements( wikitext, prefix, suffix ) {
		const elements = [];
		let start = wikitext.indexOf( prefix );
		while ( start > -1 ) {
			let depth = 0;
			let position;
			for ( position = start; position < wikitext.length; position++ ) {
				if ( wikitext.substr( position, prefix.length ) === prefix ) {
					position += prefix.length - 1;
					depth++;
				}
				if ( wikitext.substr( position, suffix.length ) === suffix ) {
					position += suffix.length - 1;
					depth--;
				}
				if ( !depth ) {
					break;
				}
			}
			const end = position - start + 1;
			const element = wikitext.substr( start, end );
			elements.push( element );
			start = wikitext.indexOf( prefix, start + 1 );
		}
		return elements;
	},

	getTemplates( wikitext ) {
		return this.getElements( wikitext, '{{', '}}' );
	},

	getTables( wikitext ) {
		return this.getElements( wikitext, '{|', '|}' );
	},

	getComments( wikitext ) {
		return this.getElements( wikitext, '<!--', '-->' );
	},

	getLinks( wikitext ) {
		return this.getElements( wikitext, '[[', ']]' );
	},

	getLinkTitle( wikitext ) {
		wikitext = wikitext.trim();
		wikitext = wikitext.replace( '[[', '' );
		wikitext = wikitext.replace( /]]$/, '' );
		const parts = wikitext.split( '|' );
		const first = parts.shift();
		const title = first.trim();
		return title;
	},

	getFiles( wikitext, namespace ) {
		const files = [];
		const links = this.getLinks( wikitext );
		for ( const link of links ) {
			const title = this.getLinkTitle( link );
			if ( title.toLowerCase().startsWith( 'file:' ) ) {
				files.push( link );
			} else if ( namespace && title.toLowerCase().startsWith( namespace.toLowerCase() + ':' ) ) {
				files.push( link );
			}
		}
		return files;
	},

	getFileName( wikitext ) {
		const title = this.getLinkTitle( wikitext );
		const parts = title.split( ':' );
		const name = parts[1];
		return name;
	},

	getFileExtension( wikitext ) {
		const name = this.getFileName( wikitext );
		const extension = name.split( '.' ).pop();
		return extension;
	},

	getFileParameters( wikitext ) {
		// Remove the outer square braces
		wikitext = wikitext.trim();
		wikitext = wikitext.replace( '[[', '' );
		wikitext = wikitext.replace( /]]$/, '' );

		// Temporarily replace every subelement that may contain pipes to prevent absolute chaos
		const templates = this.getTemplates( wikitext );
		const links = this.getLinks( wikitext );
		const tables = this.getTables( wikitext );
		const elements = [ ...templates, ...links, ...tables ];
		wikitext = this.replaceElements( wikitext, elements );

		// Parse the template params
		const parts = wikitext.split( '|' );
		parts.shift(); // The first part is the file name, so remove it
		const params = {};
		let anonymousParams = 0;
		for ( const part of parts ) {
			const pair = part.split( '=' );
			let param = pair[0];
			let value = pair[1];
			if ( value === undefined ) {
				value = param;
				anonymousParams++;
				param = anonymousParams.toString();
			}
			param = param.trim();
			value = value.trim();
			value = this.restoreElements( value );
			params[ param ] = value;
		}

		return params;
	},

	getTemplateName( templateWikitext ) {
		if ( !this.isTemplate( templateWikitext ) ) {
			return;
		}
		templateWikitext = templateWikitext.replace( /^{{/, '' );
		templateWikitext = templateWikitext.replace( /}}$/, '' );
		const parts = templateWikitext.split( '|' );
		const first = parts.shift();
		const name = first.trim();
		return name;
	},

	getTemplateNames( wikitext ) {
		let names = [];
		const templates = this.getTemplates( wikitext );
		for ( const template of templates ) {
			const name = this.getTemplateName( template );
			names.push( name );
		}
		return names;
	},

	getTemplateParameters( templateWikitext ) {
		if ( !this.isTemplate( templateWikitext ) ) {
			return;
		}

		// Remove the outer curly braces
		templateWikitext = templateWikitext.replace( /^{{/, '' );
		templateWikitext = templateWikitext.replace( /}}$/, '' );

		// Temporarily replace every subelement that may contain pipes to prevent absolute chaos
		// Also replace comments, as there might be commented-out params
		const templates = this.getTemplates( templateWikitext );
		const links = this.getLinks( templateWikitext );
		const tables = this.getTables( templateWikitext );
		const comments = this.getComments( templateWikitext );
		const elements = [ ...templates, ...links, ...tables, ...comments ];
		templateWikitext = this.replaceElements( templateWikitext, elements );

		// Parse the template params
		const parts = templateWikitext.split( '|' );
		parts.shift(); // The first part is the template name, so remove it
		const params = {};
		let anonymousParams = 0;
		for ( const part of parts ) {
			const paramParts = part.split( '=' );
			let paramName, paramValue;
			if ( paramParts.length === 1 ) {
				anonymousParams++;
				paramName = anonymousParams.toString();
				paramValue = paramParts.shift();
			} else {
				paramName = paramParts.shift();
				paramValue = paramParts.join( '=' );
			}
			paramName = paramName.trim();
			paramValue = paramValue.trim();
			paramValue = this.restoreElements( paramValue );
			params[ paramName ] = paramValue;
		}

		return params;
	},

	/**
	 * Get the wikitext of the first template with the given name
 	 */
	getTemplate( wikitext, templateName ) {
		const templates = this.getTemplates( wikitext );
		for ( const template of templates ) {
			const name = this.getTemplateName( template );
			if ( name === templateName ) {
				return template;
			}
		}
	},

	getTagAttribute( tagWikitext, attributeName ) {
		const regexp1 = new RegExp( '<[^/>]+ ' + attributeName + ' *= *"([^">]+)"[^>]*>', 'i' );
		let match = tagWikitext.match( regexp1 );
		if ( !match ) {
			const regexp2 = new RegExp( "<[^/>]+ " + attributeName + " *= *'([^'>]+)'[^>]*>", 'i' );
			match = tagWikitext.match( regexp2 );
		}
		if ( !match ) {
			const regexp3 = new RegExp( '<[^/>]+ ' + attributeName + ' *= *([^">]+)[^>]*>', 'i' );
			match = tagWikitext.match( regexp3 );
		}
		if ( match ) {
			return match[1];
		}
	},

	getTagContent( tagWikitext ) {
		const match = tagWikitext.match( /^<.+?>(.*?)<\/.+?>/ );
		if ( match ) {
			return match[1];
		}
	},

	// @todo Generalize to getTag or getTags
	getReferences( wikitext ) {
		const references = [];
		const referenceRegExp = /< *ref(>| [^/]*>).*?< *\/ *ref *>/gi;
		const referenceMatches = wikitext.matchAll( referenceRegExp );
		for ( const referenceMatch of referenceMatches ) {
			const referenceWikitext = referenceMatch[0];
			references.push( referenceWikitext );
		}
		return references;
	},

	getLists( wikitext ) {
		wikitext += "\n\n"; // Append two newlines to match lists at the very end of the wikitext
		const lists = [];
		const listRegExp = /^([*#].+?)^[^*#]/gsm;
		const listMatches = wikitext.matchAll( listRegExp );
		for ( const listMatch of listMatches ) {
			const listWikitext = listMatch[0];
			lists.push( listWikitext );
		}
		return lists;
	},

	// @todo Probably shouldn't split nested items
	getListItems( listWikitext ) {
		const listItems = listWikitext.split( /^[*#] */gm );
		listItems.shift();
		return listItems;
	},

	getLeadSection( wikitext ) {
		wikitext = wikitext.replace( /^==.*/sm, '' );
		wikitext = wikitext.trim();
		return wikitext;
	},

	getSectionTitles( wikitext ) {
		const sectionTitles = [];
		const sectionTitleRegExp = /^==+ *(.+?) *==+/gm;
		const sectionTitleMatches = wikitext.matchAll( sectionTitleRegExp );
		for ( const sectionTitleMatch of sectionTitleMatches ) {
			const sectionTitle = sectionTitleMatch[1];
			sectionTitles.push( sectionTitle );
		}
		return sectionTitles;
	},

	// @todo Fails with "{{Foo}}{{Bar}}" but using getTemplates() might be overkill
	isTemplate( wikitext ) {
		return /^{{.+}}$/s.test( wikitext );
	}
};
// </nowiki>