
import urlparse, httplib, os.path, re, pg, string

def url_here(url):
   proto, server, path, foo, bar = urlparse.urlsplit(url)
   if proto != 'http':
      raise TypeError, "Can't handle protocol %s" % proto
   else:
      # connect the http server
      conn = httplib.HTTPConnection(server)
      # throw a head query and get the result
      conn.request('HEAD', path)
      rep = conn.getresponse()
      # close connection
      conn.close()
      if rep.status == 200:
         # document is present
         return 1
      elif rep.status == 404:
         # document not present
         return 0
      else:
         raise RuntimeError, 'Unknown answer: %s' % rep.status

def s2eye(fname):
   """figure out the url of the mirrored document."""
   abspath = os.path.abspath(fname)
   mo = re.match('^/www/uploads/(?P<imc>.*)/metafiles/(?P<apath>[^/]*)$', abspath)
   # thumbs are not mirrored
   if mo and not re.match('.*thumb', abspath):
      imc = mo.group('imc')
      afpath = mo.group('apath')
      if os.path.isfile('/www/uploads/%s/metafiles/%s' % (imc, afpath)):
         http_url_tmpl = 'http://clients.loudeye.com/imc/%(imc)s/%(afpath)s' 
         ftp_path_tmpl = '/metafiles/%(imc)s/%(afpath)s'
	 data = { 'imc':imc, 'afpath':afpath}
         http_url = http_url_tmpl % data
         return http_url
      else:
         return None
   else:
      return None

def get_next(db, imc, num, offset, maxage):
   """ get the num next files that need to be tested for mirroring."""
   q = """select id, linked_file from webcast where created < now() - %d
           and linked_file != '' and mirrored = 't' 
	   limit %d offset %d;""" % (maxage, num, offset)
   result = []
   for r in db.query(q).getresult():
      """ figure out the local path of the document."""
      file = re.sub('.*/uploads/', '/www/uploads/%s/' % imc, r[1])
      result.append( (r[0], string.strip(file)) )
   return result

if __name__ == '__main__':
   import sys
   imc = sys.argv[1]
   maxage = int(sys.argv[2])
   db = pg.DB(user='php', dbname= 'active_%s' % imc)
   offset = 0
   records = get_next(db, imc, 100, offset, maxage)
   while len(records) > 0:
      offset += 100
      for article_id, fname in records:
         http_url = s2eye(fname)
         if http_url:
            if url_here(http_url):
	       # document is mirrored, print its path
	       print '%d, %s' % (article_id, fname)
            else:
	       # document is not mirrored, do nothing
	       #print ' n %s' % fname
	       pass
      records = get_next(db, imc, 100, offset, maxage)
   db.close()
