/**
* Copyright (c) 2017-2025 Derk-Jan Hartman [[User:TheDJ]]
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* This script provides import and export functionality of CSV and XLSX files
* into Commons' tabular data sets.
*
* Known issues:
* - You always need to make sure your file has a header
* - Types of imported data are a bit of guess work
* - The localized type gets exported as the first entry for the csv/tsv
*
* @param $
* @param mw
*/
( function ( $, mw ) {
const papaScript = mw.loader.getScript( 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/PapaParse/5.4.1/papaparse.min.js' );
const xlsxScript = mw.loader.getScript( 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/xlsx/0.18.5/xlsx.full.min.js' );
const deepCopy = ( object ) => $.extend( true, {}, object );
const deepCopyArray = ( array ) => $.extend( true, [], array );
const addKeysToSet = ( set, obj ) => {
Object.keys( obj ).forEach( ( key ) => set.add( key ) );
};
const isNonNullObject = ( value ) => typeof value === 'object' && value !== null;
function saveAs( blob, filename ) {
const url = URL.createObjectURL( blob );
const a = document.createElement( 'a' );
a.style.display = 'none';
a.href = url;
a.download = filename || 'download';
document.body.appendChild( a );
// this makes this function only work when the wrapping code executes immediately
a.click();
setTimeout( () => {
document.body.removeChild( a );
URL.revokeObjectURL( url );
}, 1000 );
}
function Workbook() {
if ( !( this instanceof Workbook ) ) {
return new Workbook();
}
this.SheetNames = [];
this.Sheets = {};
}
const i18n = {
'gadget-tabular-import-placeholder': 'A .csv or .xlsx file to import',
'gadget-tabular-export-csv': 'Export to CSV',
'gadget-tabular-export-excel': 'Export to Excel',
'gadget-tabular-unsupported-file': 'Unsupported file type. Please select either a .csv or .xlsx file.',
'gadget-tabular-import-failed': 'Unable to import file: $1',
'gadget-tabular-description-placeholder': 'Please enter a description',
'gadget-tabular-importing-summary': 'Importing data $1',
'gadget-tabular-imported-from': 'Imported from file $1 $2'
};
class TabularImportExport {
constructor() {
this.selectFileWidget = null;
this.api = null;
}
install() {
this.api = new mw.Api();
if ( [ 'edit', 'submit' ].includes( mw.config.get( 'wgAction' ) ) ) {
this.selectFileWidget = new OO.ui.SelectFileInputWidget( { placeholder: mw.msg( 'gadget-tabular-import-placeholder' ) } );
this.selectFileWidget.on( 'change', this.selectFile.bind( this ) );
$( '#editform:not([readonly])' ).before( this.selectFileWidget.$element );
} else if ( mw.config.get( 'wgAction' ) === 'view' ) {
const button1 = new OO.ui.ButtonWidget( { label: mw.msg( 'gadget-tabular-export-csv' ) } );
button1.on( 'click', () => this.fetchData( 'csv' ) );
const button2 = new OO.ui.ButtonWidget( { label: mw.msg( 'gadget-tabular-export-excel' ) } );
button2.on( 'click', () => this.fetchData( 'excel' ) );
const layout = new OO.ui.HorizontalLayout( {
items: [
button1,
button2
]
} );
$( '#mw-content-text' ).append( layout.$element );
const skin = mw.config.get( 'skin' );
let portletId = 'p-tb';
if ( skin === 'monobook' ) {
portletId = 'p-cactions';
} else if ( skin === 'vector' || skin === 'vector-2022' ) {
portletId = 'p-electronpdfservice-sidebar-portlet-heading';
}
$( mw.util.addPortletLink( portletId, '', mw.msg( 'gadget-tabular-export-csv', 't-tiec' ) ) ).on( 'click', ( e ) => {
this.fetchData( 'csv' );
e.preventDefault();
} );
$( mw.util.addPortletLink( portletId, '', mw.msg( 'gadget-tabular-export-excel', 't-tiee' ) ) ).on( 'click', ( e ) => {
this.fetchData( 'excel' );
e.preventDefault();
} );
}
}
fetchData( type ) {
const apiRequest = this.api.get( {
action: 'query',
formatversion: 2,
format: 'json',
prop: 'revisions',
rvprop: 'ids|user|content|timestamp',
rvslots: 'main',
titles: mw.config.get( 'wgPageName' )
} );
$.when( apiRequest, papaScript, xlsxScript ).then( ( apiResponse, papaResult, xlsxResult ) => {
let jsondata;
const query = apiResponse[ 0 ] && apiResponse[ 0 ].query;
const page = query && query.pages && query.pages[ 0 ];
const rev = page && page.revisions && page.revisions[ 0 ];
if ( rev && rev.slots && rev.slots.main && rev.slots.main.content ) {
try {
jsondata = JSON.parse( rev.slots.main.content );
} catch ( e ) {
OO.ui.alert( 'Could not parse tabular data: ' + e );
return;
}
}
if ( !jsondata ) {
OO.ui.alert( 'Could not find tabular data' );
return;
}
if ( type === 'csv' ) {
this.exportToCSV( jsondata, page );
} else if ( type === 'excel' ) {
this.exportToExcel( jsondata, page );
}
} );
}
exportToExcel( jsondata, pageData ) {
const workbook = this.convertToExcel( jsondata, pageData );
const wopts = { bookType: 'xlsx', bookSST: false, type: 'array', charset:'utf-8', cellDates:true, cellStyles:true };
XLSX.writeFile( workbook, `${ mw.config.get( 'wgTitle' ) }.xlsx`, wopts );
}
exportToCSV( jsondata, pageData ) {
const csvdata = { fields: [], data: [] };
jsondata.schema.fields.forEach( ( element ) => {
csvdata.fields.push( element.name );
} );
jsondata.data.forEach( ( row ) => {
const rowArray = [];
row.forEach( ( xElement, xIndex ) => {
if ( jsondata.schema.fields[ xIndex ].type === 'localized' && typeof xElement === 'object' ) {
rowArray.push( Object.values( xElement )[ 0 ] );
} else {
rowArray.push( xElement );
}
} );
csvdata.data.push( rowArray );
} );
const csv = Papa.unparse( csvdata );
const blob = new Blob( [ csv ], { type: 'text/csv' } );
saveAs( blob, `${ mw.config.get( 'wgTitle' ) }.csv` );
}
convertToExcel( jsondata, pageData ) {
const wb = new Workbook();
const dataArray = [ [] ];
const localizedIndices = [];
const languageCodes = new Set();
jsondata.schema.fields.forEach( ( field, idx ) => {
// Push headers onto the output array
dataArray[ 0 ].push( field.name );
// Collect known language codes
if ( field.title && typeof field.title === 'object' ) {
addKeysToSet( languageCodes, field.title );
}
// Find indices of columns with type 'localized'
if ( field && field.type === 'localized' ) {
localizedIndices.push( idx );
}
} );
// Modify the array to get one of the localized values
jsondata.data.forEach( ( row ) => {
const copyOfRow = deepCopyArray( row );
localizedIndices.forEach( ( idx ) => {
if ( isNonNullObject( copyOfRow[ idx ] ) ) {
addKeysToSet( languageCodes, copyOfRow[ idx ] );
copyOfRow[ idx ] = Object.values( copyOfRow[ idx ] )[ 0 ];
}
} );
dataArray.push( copyOfRow );
} );
const ws = XLSX.utils.aoa_to_sheet( dataArray );
const ws_name = mw.config.get( 'wgTitle' )
.replace( /[/\\*'?[\]:]/g, ' ' )
.toLowerCase()
.slice( 0, 31 );
ws[ '!merges' ] = [];
wb.SheetNames.push( ws_name );
wb.Sheets[ ws_name ] = ws;
const copyDataWithReferences = (dataArray, dataSheetName) => {
return dataArray.map((row, i) => {
return row.map( (cell, j) => {
const cellName = XLSX.utils.encode_cell({r:i, c:j});
return {f: `'${dataSheetName}'!${cellName}` };
});
});
};
// dataArray is overwritten here repeatedly, but it overrides the same cells
// we leave hidden comments to mark cells as type localized, for later.
function createTranslatedArrayOfArrays( dataArray, lang ) {
jsondata.schema.fields.forEach( ( field, idx ) => {
// Push headers onto the output array
const firstEntry = Object.entries(field.title)[0];
const cellValue = field.title[ lang ] || firstEntry[ 1 ];
const cellLanguage = field.title[ lang ] ? lang : firstEntry[ 0 ] || '';
const cell = {v: cellValue, c: [ { a: 'localized', t: cellLanguage, hidden: true} ] };
dataArray[ 0 ][ idx ] = cell;
} );
jsondata.data.forEach( ( row, rowIndex ) => {
localizedIndices.forEach( ( idx ) => {
if ( isNonNullObject( row[ idx ] ) ) {
const firstEntry = Object.entries(obj)[0];
const cellValue = row[ idx ][ lang ] || firstEntry[ 1 ] || null;
const cellLanguage = row[ idx ][ lang ] ? lang : firstEntry[ 0 ] || '';
const cell = {v: cellValue, c: [ { a: 'localized', t: cellLanguage, hidden: true } ] };
cell.c.hidden = true;
dataArray[rowIndex + 1][ idx ] = cell;
}
} );
} );
return dataArray;
}
// A copy that refers back to the original sheet name for cell values
const dataArrayWithReferences = copyDataWithReferences( dataArray, ws_name );
// Add copies of the data for each languageCode
[ ...languageCodes ].sort().forEach( ( lang ) => {
const ws_name = lang;
const ws = XLSX.utils.aoa_to_sheet( createTranslatedArrayOfArrays( dataArrayWithReferences, lang ) );
wb.SheetNames.push( ws_name );
wb.Sheets[ ws_name ] = ws;
} );
// Generate links
const pageUrl = new URL( mw.config.get( 'wgArticlePath' ).replace( '$1', pageData.title ), window.location );
const historyUrl = new URL( mw.config.get( 'wgScript' ), window.location );
historyUrl.searchParams.set( 'title', pageData.title );
historyUrl.searchParams.set( 'action', 'history' );
// Save metadata to excel properties
wb.Props = {
Title: pageData.title,
Company: mw.config.get( 'wgSiteName' ),
SheetNames: wb.SheetNames,
Worksheets: wb.SheetNames.length
};
wb.Custprops = {
Revision: pageData.revisions[ 0 ].revid,
LastModified: pageData.revisions[ 0 ].timestamp,
Url: pageUrl.toString(),
Contributors: historyUrl.toString(),
Sources: jsondata.sources || '',
LicenseCode: jsondata.license,
Software: 'Tabular Import/Export-gadget'
};
for ( const language in jsondata.description ) {
wb.Custprops[ `Description.${ language }` ] = jsondata.description[ language ];
}
if ( jsondata.mediaWikiCategories && jsondata.mediaWikiCategories.length > 0 ) {
wb.Custprops.MediaWikiCategories = jsondata.mediaWikiCategories.map( ( cat ) => {
let str = 'Category:' + cat.name;
if ( cat.sort ) {
str += '|' + cat.sort;
}
return str;
} ).join( ', ' );
}
return wb;
}
selectFile() {
const selectedFile = this.selectFileWidget.getValue();
const name = selectedFile.name;
$.when( papaScript, xlsxScript ).then( ( papaResult, xlsxResult ) => {
if ( name.endsWith( '.csv' ) || name.endsWith( '.tsv' ) || name.endsWith('.txt') ) {
this.importCSVFile( selectedFile );
} else if ( name.endsWith( '.xlsx' ) || name.endsWith( '.xlsb' ) || name.endsWith( '.xls' ) || name.endsWith( '.ods' ) ) {
selectedFile.arrayBuffer().then( ( buffer ) => {
const workbook = XLSX.read( buffer, { type: 'array', dense:true, charset: 'utf-8' } );
this.importXLSX( workbook );
} );
} else {
OO.ui.alert( mw.msg( 'gadget-tabular-unsupported-file' ) );
}
} );
}
importCSVFile( file ) {
Papa.parse( file, {
header: true,
dynamicTyping: true,
complete: this.importCSV.bind( this ),
error: this.importCSVFailed.bind( this ),
encoding: 'utf-8'
} );
}
importCSV( csvdata ) {
const jsondata = {
schema: { fields: [] },
data: []
};
csvdata.meta.fields.forEach( ( columnName ) => {
jsondata.schema.fields.push( {
name: columnName.replace( /\W/g, '' ),
type: 'string',
title: {
en: columnName
}
} );
} );
csvdata.data.forEach( ( row ) => {
const columnData = [];
csvdata.meta.fields.forEach( ( columnName ) => {
columnData.push( row[ columnName ] === '' ? null : row[ columnName ] );
} );
jsondata.data.push( columnData );
} );
this.writeTextbox( jsondata );
}
importCSVFailed( error ) {
OO.ui.alert( mw.msg( 'gadget-tabular-import-failed', error ) );
}
importXLSX( workbook ) {
const jsondata = {
schema: { fields: [] },
data: []
};
if ( workbook.Custprops ) {
// If the workbook has custom properties, we can use them to fill in some metadata
if ( workbook.Custprops.LicenseCode ) {
jsondata.license = workbook.Custprops.LicenseCode;
}
if ( workbook.Custprops.Sources ) {
jsondata.sources = workbook.Custprops.Sources;
}
// Extract language descriptions from keys starting with 'Description.'
Object.entries( workbook.Custprops ).forEach( ( [ key, value ] ) => {
if ( key.startsWith( 'Description.' ) ) {
const lang = key.slice( 'Description.'.length );
if ( !jsondata.description ) {
jsondata.description = {};
}
jsondata.description[ lang ] = value;
}
} );
if ( workbook.Custprops.MediaWikiCategories ) {
jsondata.mediaWikiCategories = workbook.Custprops.MediaWikiCategories.split( ', ' ).map( ( catStr ) => {
// Remove "Category:" prefix and split by '|'
const [ name, sort ] = catStr.replace( /^Category:/, '' ).split( '|' );
const category = { name };
if ( sort ) {
category.sort = sort;
}
return category;
} );
}
}
/* We only look at the first sheet */
const sheet = workbook.Sheets[ workbook.SheetNames[ 0 ] ];
const jsonsheet = XLSX.utils.sheet_to_json( sheet, { raw: true, defval: null } );
if ( jsonsheet.length < 2 ) {
return;
}
for ( const header in jsonsheet[ 0 ] ) {
jsondata.schema.fields.push( {
name: header.replace( /\W/g, '' ),
type: 'string',
title: {
en: header
}
} );
}
jsonsheet.forEach( ( row ) => {
const rowData = [];
for ( const header in jsonsheet[ 0 ] ) {
let cell = row[ header ];
if ( cell === undefined || cell === '' ) {
cell = null;
}
rowData.push( cell );
}
jsondata.data.push( rowData );
} );
if (workbook.SheetNames.length > 1) {
workbook.SheetNames.forEach( (name, index) => {
if ( workbook.Sheets[name]['!type'] !== undefined ) return;
workbook.Sheets[name].forEach( (row, i) => {
row.forEach( (cell, j) => {
if( cell.c && cell.c[0] && cell.c[0].a === 'localized' && cell.c[0].t === name ) {
if ( i === 0 ) {
if (jsondata.schema.fields[j].title ) {
jsondata.schema.fields[j].title[name] = cell.v;
}
return;
}
if ( typeof jsondata.data[i+1][j] === 'object' ) {
jsondata.data[i+1][j][cell.c[0].t] = cell.v;
} else {
jsondata.data[i+1][j] = { name: cell.v };
}
}
} );
} );
});
}
this.writeTextbox( jsondata );
}
writeTextbox( jsondata ) {
const selectFileWidget = this.selectFileWidget;
const selectedFile = selectFileWidget && selectFileWidget.getValue ? selectFileWidget.getValue() : { name: '' };
// Guess the data type, based on the first row of data
const firstRow = jsondata.data[ 0 ] || [];
firstRow.forEach( ( columnEl, index ) => {
if ( typeof columnEl === 'number' ) {
jsondata.schema.fields[ index ].type = 'number';
} else if ( typeof columnEl === 'boolean' ) {
jsondata.schema.fields[ index ].type = 'boolean';
}
} );
// Write some metadata
const username = mw.config.get( 'wgUserName' );
const formattedUsername = username ?
`by ${ mw.config.get( 'wgFormattedNamespaces' )[ mw.config.get( 'wgNamespaceIds' ).user ] }:${ username }` :
'';
const metadata = {
license: 'CC0-1.0',
description: {
en: mw.msg( 'gadget-tabular-description-placeholder' )
},
sources: mw.msg( 'gadget-tabular-imported-from', selectedFile.name, formattedUsername )
};
/* Merge it */
const merged = $.extend( metadata, jsondata );
$( '#wpTextbox1' ).textSelection( 'setContents', JSON.stringify( merged, null, '\t' ) );
$( '#wpSummary' ).val( mw.msg( 'gadget-tabular-importing-summary', selectedFile.name ) );
}
}
if ( mw.config.get( 'wgNamespaceNumber' ) === 486 && mw.config.get( 'wgTitle' ).endsWith( '.tab' ) ) {
mw.messages.set( i18n );
$.when(
mw.loader.using( [
'oojs-ui', 'oojs-ui-core', 'oojs-ui-widgets', 'mediawiki.api', 'jquery.textSelection'
] ),
$.ready
).then( () => {
const tabularImportExport = new TabularImportExport();
tabularImportExport.install();
} );
}
}( jQuery, mediaWiki ) );