#!/usr/bin/python import string,urllib,re,os, filecmp, time # gets the comics from various sources, puts them into appropriate directories # run it once a day to get them all # released under the GPL, http://www.gnu.org/licenses/gpl.html # where to put them, change this to your liking, then run the script target_dir="D:\share\comic" # dictionary of comics to get, adjust as necessary # name value pairs indicate comic name, web site and regexp to grab the gif comic_dict={'dilbert': ('http://www.dilbert.com/',re.compile(r'

Comix\n

Comix

\n') files = os.listdir(starting_dir) for file in files: if string.find(file,'.gif')>0: indexfile.write('

\n') indexfile.write('\n') indexfile.close() def getcomix(): # check if our path exists try: if not os.path.isdir(target_dir): os.makedirs(target_dir) except: sys.stderr.write('Error: Could not create target directory \"'+target_dir+'\".') sys.exit(1) # for each comic for comic in comic_dict.keys(): print "###\nRetrieving: "+comic # check if subdir exists if not os.path.isdir(os.path.join(target_dir,comic)): os.makedirs(os.path.join(target_dir,comic)) # set up the search comic_url,comic_rpat=comic_dict[comic] # get the html try: print " getting "+comic_url url_handle = urllib.urlopen(comic_url) html_content = url_handle.readlines() except: print "Error retrieving html." # find the gif path comic_path="" for line in html_content: ob=comic_rpat.search(line) if ob: comic_path=ob.group(1)+ob.group(2) break if comic_path=="": print "No matching image found. Quit." sys.exit(0) # get the gif print " image: "+comic_path try: filename, headers = urllib.urlretrieve(comic_url+comic_path,os.path.join(target_dir,comic,'today'+ob.group(2))) except IOError: print "Error retrieving image." # move it, robust year=time.strftime('%Y') week=time.strftime('%U') dow=time.strftime('%w') local_pathname=os.path.join(target_dir,comic,year,week) local_filename=os.path.join(local_pathname,dow+ob.group(2)) if not os.path.isdir(local_pathname): os.makedirs(local_pathname) os.rename(os.path.join(target_dir,comic,'today'+ob.group(2)),local_filename) makeindex(local_pathname) def removeduplicates(dirpath): if os.path.isdir(dirpath): flist=[] ed={} for fname in os.listdir(dirpath): if os.path.isfile(os.path.join(dirpath, fname)): flist.append(os.path.join(dirpath,fname)) for fname in flist: if os.path.exists(fname): for cfile in flist: if os.path.exists(cfile): if cfile!=fname and filecmp.cmp(fname,cfile): print(" "+fname+": is equal to: "+cfile) try: os.remove(cfile) except: pass if __name__ == "__main__": getcomix()