import urlparse, httplib, os.path, re, pg, string def url_here(url): proto, server, path, foo, bar = urlparse.urlsplit(url) if proto != 'http': raise TypeError, "Can't handle protocol %s" % proto else: # connect the http server conn = httplib.HTTPConnection(server) # throw a head query and get the result conn.request('HEAD', path) rep = conn.getresponse() # close connection conn.close() if rep.status == 200: # document is present return 1 elif rep.status == 404: # document not present return 0 else: raise RuntimeError, 'Unknown answer: %s' % rep.status def s2eye(fname): """figure out the url of the mirrored document.""" abspath = os.path.abspath(fname) mo = re.match('^/www/uploads/(?P.*)/metafiles/(?P[^/]*)$', abspath) # thumbs are not mirrored if mo and not re.match('.*thumb', abspath): imc = mo.group('imc') afpath = mo.group('apath') if os.path.isfile('/www/uploads/%s/metafiles/%s' % (imc, afpath)): http_url_tmpl = 'http://clients.loudeye.com/imc/%(imc)s/%(afpath)s' ftp_path_tmpl = '/metafiles/%(imc)s/%(afpath)s' data = { 'imc':imc, 'afpath':afpath} http_url = http_url_tmpl % data return http_url else: return None else: return None def get_next(db, imc, num, offset, maxage): """ get the num next files that need to be tested for mirroring.""" q = """select id, linked_file from webcast where created < now() - %d and linked_file != '' and mirrored = 't' limit %d offset %d;""" % (maxage, num, offset) result = [] for r in db.query(q).getresult(): """ figure out the local path of the document.""" file = re.sub('.*/uploads/', '/www/uploads/%s/' % imc, r[1]) result.append( (r[0], string.strip(file)) ) return result if __name__ == '__main__': import sys imc = sys.argv[1] maxage = int(sys.argv[2]) db = pg.DB(user='php', dbname= 'active_%s' % imc) offset = 0 records = get_next(db, imc, 100, offset, maxage) while len(records) > 0: offset += 100 for article_id, fname in records: http_url = s2eye(fname) if http_url: if url_here(http_url): # document is mirrored, print its path print '%d, %s' % (article_id, fname) else: # document is not mirrored, do nothing #print ' n %s' % fname pass records = get_next(db, imc, 100, offset, maxage) db.close()