Commons:Digital Public Library of America/Analytics/Code

import json, requests, os, pywikibot, datetime, argparse

parse = argparse.ArgumentParser()
parse.add_argument('--cat', dest='cat', metavar='CATEGORY',
                    action='store')
arg = parse.parse_args()

site = pywikibot.Site()
site.login()

updated = 0
none = 0
exists = 0
fulfilled = 0
already = 0
requested = 0

datapage = {}
datapage['license'] = 'CC0-1.0'
datapage['schema'] = { 'fields': [ { 'name': 'timestamp', 'type': 'string', 'title': { 'en': 'Month' } }, { 'name': 'pageviews', 'type': 'number', 'title': { 'en': 'Pageviews' } } ] }

chartpage = { 'license': 'CC0-1.0', 'version': 1, "type": 'area', 'xAxis': { 'title': { 'en': 'Month' }, 'type': 'date' }, 'yAxis': { 'title': { 'en': 'Views' } } }

cimlist = []
print('\n****\n')
os.system('curl -O "https://gitlab.wikimedia.org/repos/data-engineering/airflow-dags/-/raw/main/main/dags/commons/commons_category_allow_list.tsv"')

with open('commons_category_allow_list.tsv', 'r') as cim:
    for line in cim:
        cimlist.append('Category:' + line.strip().replace('_',' ').replace('%26', '&').replace('%27','\''))
cimcat = pywikibot.Category(site, 'Category requested for Commons Impact Metrics')
cimreqs = cimcat.subcategories()
for cimreq in cimreqs:
    category = pywikibot.Page(site, cimreq.title())
    cimreq = cimreq.title()
    if cimreq in cimlist:
        print(cimreq)
        category.change_category(cimcat, None, 'Remove category: [[Category:Category requested for Commons Impact Metrics]]')
        print('Removed request for ' + cimreq)
        fulfilled += 1

def get_data(cat):
    load = json.loads(requests.get('https://wikimedia.org/api/rest_v1/metrics/commons-analytics/pageviews-per-category-monthly/' + cat + '/deep/all-wikis/00000101/99991231').text)

    outputs = []
    for month in load['items']:
        outputs.append([ month['timestamp'][0:7], month['pageview-count'] ])

    return outputs

if arg.cat:           
    cats = [arg.cat, ]
    
else:
    print('\nFinding categories to work on... (' + str(datetime.datetime.now()) + ')')
    cats = []
    update_cat = pywikibot.Category(site, 'Category with page views table')
    
    for u in update_cat.subcategories():
        cats.append(u.title())
    cats.remove('Category:Category page views need to be updated')
    print('{{views from category}} categories retrieved! (' + str(datetime.datetime.now()) + ')')

for cat in cats:
    print('\n' + cat)
    
    datapage['sources'] = 'Copied from [https://wikimedia.org/api/rest_v1/metrics/commons-analytics/pageviews-per-category-monthly/' + cat.replace(' ','_').replace('Category:','') + '/deep/all-wikis/00000101/99991231 Commons Impact Metrics].'
    datapage['description'] = {'en': 'Data from commons-analytics/pageviews-per-category-monthly endpoint for ' + cat.replace(' ','_')}

    pagename = pywikibot.Page(site, cat.replace('Category:','Data:Views/') + '.tab')
    chartpagename = pywikibot.Page(site, cat.replace('Category:','Data:Views/') + '.chart')

    if not chartpagename.exists():
        chartpage['source'] = cat.replace('Category:','Views/') + '.tab'
        chartpagename.text = json.dumps(chartpage, indent=4)
        chartpagename.save('   Creating chart page for category page views from Commons Impact Metrics.')
    current = False

    if pagename.exists():
        now = (datetime.datetime.now().replace(day=1) - datetime.timedelta(days=1)).strftime("%Y-%m")
        if now in pagename.text:

            current = True
            print('   Data already up-to-date!')
            exists += 1
            
    if not current:
        category = pywikibot.Page(site, cat)
        try:
            datapage['data'] = get_data(cat.replace(' ','_').replace('&', '%26').replace('Category:',''))
        except KeyError:
            if cat not in cimlist:
                print('   Not found in Commons Impact Metrics.')
                if cimcat not in category.categories():
                    category.text += '\n[[Category:Category requested for Commons Impact Metrics]]'
                    category.save(summary='Adding category: [[Category:Category requested for Commons Impact Metrics]]')
                    print('   Category requested for Commons Impact Metrics!')
                    requested += 1
                else:
                    print('   Category already requested for Commons Impact Metrics.')
                    already += 1
            else:
                print('   Data still generating in Commons Impact Metrics.')
                none += 1
            continue
        pagename.text = json.dumps(datapage, indent=4)
        pagename.save('Adding tabular data for category page views from Commons Impact Metrics.')
        updated += 1

        try:
            category.touch()
        except:
            pass

print("""
****
| -- Total categories currently in Commons Impact Metrics: """ + str(len(cimlist)) + """
| -- Categories with updated data: """ + str(updated) + """
| -- Categories with no data detected: """ + str(none) + """
| -- Categories already up to date: """ + str(exists) + """
| -- Category requests fulfilled: """ + str(fulfilled) + """
| -- Categories already requested to be added: """ + str(already) + """
| -- Categories newly requested to be added: """ + str(requested) + """
| -- Total categories currently utilizing this tool: """ + str(len(cats)) + """
****
""")