/**
* WikitextParser.js is a set of methods for parsing wikitext
* Documentation: https://www.mediawiki.org/wiki/WikitextParser.js
* License: GNU General Public License 3 or later (http://www.gnu.org/licenses/gpl-3.0.html)
*/
// <nowiki>
window.WikitextParser = {
replacedElements: new Map(),
replaceElements( wikitext, elements ) {
for ( const element of elements ) {
if ( wikitext.includes( element ) ) {
const index = this.replacedElements.size;
wikitext = wikitext.replace( element, '@@@' + index + '@@@' );
this.replacedElements.set( index, element );
}
}
return wikitext;
},
restoreElements( wikitext ) {
const matches = wikitext.matchAll( /@@@(\d+)@@@/g );
for ( const match of matches ) {
const placeholder = match[0];
const index = Number( match[1] );
const element = this.replacedElements.get( index );
wikitext = wikitext.replace( placeholder, element );
this.replacedElements.delete( index );
}
return wikitext;
},
getElements( wikitext, prefix, suffix ) {
const elements = [];
let start = wikitext.indexOf( prefix );
while ( start > -1 ) {
let depth = 0;
let position;
for ( position = start; position < wikitext.length; position++ ) {
if ( wikitext.substr( position, prefix.length ) === prefix ) {
position += prefix.length - 1;
depth++;
}
if ( wikitext.substr( position, suffix.length ) === suffix ) {
position += suffix.length - 1;
depth--;
}
if ( !depth ) {
break;
}
}
const end = position - start + 1;
const element = wikitext.substr( start, end );
elements.push( element );
start = wikitext.indexOf( prefix, start + 1 );
}
return elements;
},
getTemplates( wikitext ) {
return this.getElements( wikitext, '{{', '}}' );
},
getTables( wikitext ) {
return this.getElements( wikitext, '{|', '|}' );
},
getComments( wikitext ) {
return this.getElements( wikitext, '<!--', '-->' );
},
getLinks( wikitext ) {
return this.getElements( wikitext, '[[', ']]' );
},
getLinkTitle( wikitext ) {
wikitext = wikitext.trim();
wikitext = wikitext.replace( '[[', '' );
wikitext = wikitext.replace( /]]$/, '' );
const parts = wikitext.split( '|' );
const first = parts.shift();
const title = first.trim();
return title;
},
getFiles( wikitext, namespace ) {
const files = [];
const links = this.getLinks( wikitext );
for ( const link of links ) {
const title = this.getLinkTitle( link );
if ( title.toLowerCase().startsWith( 'file:' ) ) {
files.push( link );
} else if ( namespace && title.toLowerCase().startsWith( namespace.toLowerCase() + ':' ) ) {
files.push( link );
}
}
return files;
},
getFileName( wikitext ) {
const title = this.getLinkTitle( wikitext );
const parts = title.split( ':' );
const name = parts[1];
return name;
},
getFileExtension( wikitext ) {
const name = this.getFileName( wikitext );
const extension = name.split( '.' ).pop();
return extension;
},
getFileParameters( wikitext ) {
// Remove the outer square braces
wikitext = wikitext.trim();
wikitext = wikitext.replace( '[[', '' );
wikitext = wikitext.replace( /]]$/, '' );
// Temporarily replace every subelement that may contain pipes to prevent absolute chaos
const templates = this.getTemplates( wikitext );
const links = this.getLinks( wikitext );
const tables = this.getTables( wikitext );
const elements = [ ...templates, ...links, ...tables ];
wikitext = this.replaceElements( wikitext, elements );
// Parse the template params
const parts = wikitext.split( '|' );
parts.shift(); // The first part is the file name, so remove it
const params = {};
let anonymousParams = 0;
for ( const part of parts ) {
const pair = part.split( '=' );
let param = pair[0];
let value = pair[1];
if ( value === undefined ) {
value = param;
anonymousParams++;
param = anonymousParams.toString();
}
param = param.trim();
value = value.trim();
value = this.restoreElements( value );
params[ param ] = value;
}
return params;
},
getTemplateName( templateWikitext ) {
if ( !this.isTemplate( templateWikitext ) ) {
return;
}
templateWikitext = templateWikitext.replace( /^{{/, '' );
templateWikitext = templateWikitext.replace( /}}$/, '' );
const parts = templateWikitext.split( '|' );
const first = parts.shift();
const name = first.trim();
return name;
},
getTemplateNames( wikitext ) {
let names = [];
const templates = this.getTemplates( wikitext );
for ( const template of templates ) {
const name = this.getTemplateName( template );
names.push( name );
}
return names;
},
getTemplateParameters( templateWikitext ) {
if ( !this.isTemplate( templateWikitext ) ) {
return;
}
// Remove the outer curly braces
templateWikitext = templateWikitext.replace( /^{{/, '' );
templateWikitext = templateWikitext.replace( /}}$/, '' );
// Temporarily replace every subelement that may contain pipes to prevent absolute chaos
// Also replace comments, as there might be commented-out params
const templates = this.getTemplates( templateWikitext );
const links = this.getLinks( templateWikitext );
const tables = this.getTables( templateWikitext );
const comments = this.getComments( templateWikitext );
const elements = [ ...templates, ...links, ...tables, ...comments ];
templateWikitext = this.replaceElements( templateWikitext, elements );
// Parse the template params
const parts = templateWikitext.split( '|' );
parts.shift(); // The first part is the template name, so remove it
const params = {};
let anonymousParams = 0;
for ( const part of parts ) {
const paramParts = part.split( '=' );
let paramName, paramValue;
if ( paramParts.length === 1 ) {
anonymousParams++;
paramName = anonymousParams.toString();
paramValue = paramParts.shift();
} else {
paramName = paramParts.shift();
paramValue = paramParts.join( '=' );
}
paramName = paramName.trim();
paramValue = paramValue.trim();
paramValue = this.restoreElements( paramValue );
params[ paramName ] = paramValue;
}
return params;
},
/**
* Get the wikitext of the first template with the given name
*/
getTemplate( wikitext, templateName ) {
const templates = this.getTemplates( wikitext );
for ( const template of templates ) {
const name = this.getTemplateName( template );
if ( name === templateName ) {
return template;
}
}
},
getTagAttribute( tagWikitext, attributeName ) {
const regexp1 = new RegExp( '<[^/>]+ ' + attributeName + ' *= *"([^">]+)"[^>]*>', 'i' );
let match = tagWikitext.match( regexp1 );
if ( !match ) {
const regexp2 = new RegExp( "<[^/>]+ " + attributeName + " *= *'([^'>]+)'[^>]*>", 'i' );
match = tagWikitext.match( regexp2 );
}
if ( !match ) {
const regexp3 = new RegExp( '<[^/>]+ ' + attributeName + ' *= *([^">]+)[^>]*>', 'i' );
match = tagWikitext.match( regexp3 );
}
if ( match ) {
return match[1];
}
},
getTagContent( tagWikitext ) {
const match = tagWikitext.match( /^<.+?>(.*?)<\/.+?>/ );
if ( match ) {
return match[1];
}
},
// @todo Generalize to getTag or getTags
getReferences( wikitext ) {
const references = [];
const referenceRegExp = /< *ref(>| [^/]*>).*?< *\/ *ref *>/gi;
const referenceMatches = wikitext.matchAll( referenceRegExp );
for ( const referenceMatch of referenceMatches ) {
const referenceWikitext = referenceMatch[0];
references.push( referenceWikitext );
}
return references;
},
getLists( wikitext ) {
wikitext += "\n\n"; // Append two newlines to match lists at the very end of the wikitext
const lists = [];
const listRegExp = /^([*#].+?)^[^*#]/gsm;
const listMatches = wikitext.matchAll( listRegExp );
for ( const listMatch of listMatches ) {
const listWikitext = listMatch[0];
lists.push( listWikitext );
}
return lists;
},
// @todo Probably shouldn't split nested items
getListItems( listWikitext ) {
const listItems = listWikitext.split( /^[*#] */gm );
listItems.shift();
return listItems;
},
getLeadSection( wikitext ) {
wikitext = wikitext.replace( /^==.*/sm, '' );
wikitext = wikitext.trim();
return wikitext;
},
getSectionTitles( wikitext ) {
const sectionTitles = [];
const sectionTitleRegExp = /^==+ *(.+?) *==+/gm;
const sectionTitleMatches = wikitext.matchAll( sectionTitleRegExp );
for ( const sectionTitleMatch of sectionTitleMatches ) {
const sectionTitle = sectionTitleMatch[1];
sectionTitles.push( sectionTitle );
}
return sectionTitles;
},
// @todo Fails with "{{Foo}}{{Bar}}" but using getTemplates() might be overkill
isTemplate( wikitext ) {
return /^{{.+}}$/s.test( wikitext );
}
};
// </nowiki>