User:Inkwina/catlistcount.py

  1. !/usr/bin/python
  2. -*- coding: UTF-8 -*-

import shelve import urllib import simplejson import time import re import mwclient


Howmany=200 Whichcategory='Category:Images that should use vector graphics' Wheretosave=u'Top 200 Images that should use vector graphics by usage'

shelffile="./catlistcount.cache" apiurl="http://commons.wikimedia.org/w/api.php" apiparams={'format': "json",

                   'action': "query",
                   'list': "categorymembers",
                   'cmlimit': "50",
                   'cmprop': 'title',
                   'cmtitle': Whichcategory 
                   }

checkusageurl="http://toolserver.org/~daniel/WikiSense/CheckUsage.php" checkusageparams={'i': , #filename

                                 'w': '_wp_20', #which wikis to check (top 20 wikipedias not to kill server)
                                 'x': 'main',    #what kind of pages
                                 'r': 'on',        #RAW
                                 'b': '1'          # not Bulk, we check 1 by 1
                                 }

wikire=re.compile('\s*\[([^\]]*)\]\s*(\d*)')

datastore=shelve.open(shelffile, writeback=True) if not ("items" in datastore): #newfile

   datastore["items"]={}
      1. datastore["wikis"]={}
   datastore["all-done"] = False

else:

   apiparams["cmcontinue"]=datastore["query-continue"] #pick up where we left last time

while not datastore["all-done"]:

   checkusageparams['i']=
   query=urllib.urlopen(apiurl,urllib.urlencode(apiparams))
   data=simplejson.load(query)
   for item in data["query"]["categorymembers"]:
       Fname=item["title"].split(':')[-1]
       Fname=Fname.encode('UTF-8').replace(' ','_')
       datastore["items"][Fname]={"ns": item["ns"]}
       if item["ns"] == 6: #pick out Image:
           checkusageparams['i'] += Fname+"\n"
           datastore["items"][Fname]["countof"]={}
           datastore["items"][Fname]["counttotal"]=0
           datastore["items"][Fname]["checked"]=False
           print "Added: "+Fname
   datastore.sync()
   print "--- Cached Data ---"
   print "From: "+data["query"]["categorymembers"][0]["title"]
   print "To: "+data["query"]["categorymembers"][-1]["title"]
   
   if  "query-continue" in data:
       apiparams["cmcontinue"]=data["query-continue"]["categorymembers"]["cmcontinue"].encode("UTF-8")
       datastore["query-continue"] =apiparams["cmcontinue"]
       datastore.sync()
   else:
       datastore["all-done"] =True
   query=urllib.urlopen(checkusageurl,urllib.urlencode(checkusageparams))
   for line in query.readlines():
       sulfarini=wikire.match(line)
       if sulfarini != None:
           print sulfarini.group(1)+" : "+sulfarini.group(2)
           whichwiki=sulfarini.group(1)
       else:
           try:
               page,file=line.split()
           except:
               continue
           if whichwiki in datastore["items"][file]["countof"]:
               datastore["items"][file]["countof"][whichwiki]+=1
      1. datastore["wikis"][whichwiki]+=1
           else:
               datastore["items"][file]["countof"][whichwiki]=1
      1. datastore["wikis"][whichwiki]=1
           datastore["items"][file]["counttotal"]+=1    
           print file+","+whichwiki+","+ str(datastore["items"][file]["countof"][whichwiki])+","+str(datastore["items"][file]["counttotal"])
   datastore.sync()
   time.sleep(2)

datastore["wikis"]={} ftotal=0 for item in datastore["items"].itervalues():

   if item['ns']==6: 
       ftotal+=int(item["counttotal"])
       for w,c in item['countof'].iteritems():
           if w in datastore["wikis"]:
               datastore["wikis"][w]+=c
           else:
               datastore["wikis"][w]=c

output= """ This Page is an Automatically generated list of the 200 most used Images that should use vector graphics

The code for making this list is available here The images are only checkd for use in Articles (not talk pages etc.) on the 20 largest wikipedias

--Inkwina (talk · contribs)


Category:Images that should use vector graphics#*%20%20Top%20200%20by%20Usage

"""

output +="\nLast Update "+time.strftime("%a, %d %b %Y %H:%M:%S %Z")+"\n"

wtotal=0 for x in datastore["wikis"].itervalues(): wtotal+=int(x) output += "\n*Items in Total: "+str(len(datastore["items"])) output += "\n**Total use(from wikis) : "+str(wtotal) output += "\n**Total use(from files) : "+str(ftotal) output += "\n----\n" wikisort = [(v, k) for k, v in datastore["wikis"].items()] wikisort.sort() wikisort.reverse() for w,v in wikisort:

   output += "\n# "+str(v)+": "+str(w)

def mycmp(x,y):

  1. print x+" : "+str(datastore["items"][x]["counttotal"])
   return cmp(datastore["items"][x]["counttotal"],datastore["items"][y]["counttotal"])*-1

sortall=[x for x in datastore["items"] if datastore["items"][x]["ns"]==6] sortall.sort(mycmp)

output += "\n\n"
  1. print output

site = mwclient.Site('commons.wikimedia.org') site.login("username", "passwd") page = site.Pages[Wheretosave] page.save(output, summary = u'Inkwina Bot Update')

Category:Images that should use vector graphics Category:User pages with broken file links