#!/usr/bin/python -W ignore

# linefix.py version 1.0
# copyright May 2003 by Mike Bleyer <mike @ hoc dot net>
#
# This program is released under the GNU GPL, version 2 or later.
# See http://www.gnu.org/licenses/gpl.html
#
# What it does: 
#
# This script hopefully solves the stupid line ending problem once and for all!
# Strips away ALL line separator garbage and replaces it with the current 
# platforms line separator (dos=\r\n, unix=\n, mac=\r)
# In particular it can handle MIXED line separators in the same text file,
# which most if not all tools and text editors fail to handle properly
# which is why I end up with them in the first place!
# (even Pythons readline() function cannot handle this properly)
#
# As an added bonus, the script can replace spaces into tabs a useful feature
# for source code. And don't tell me spaces are better, you've obviously
# never developed in a team.
#

import sys,os,string,re,getopt

# global variable defaults
spaceReplace=0
spaceString=''
line=''
separatorPat=re.compile('(?:\r\n)|\r|\n')

def FixStream(readfrom=sys.stdin, writeto=sys.stdout):
	"""Read from input stream, clean, write to output stream"""
	global line
	line=readfrom.readline()
	while len(line)>0:
		if spaceReplace>0:
			writeto.write(string.replace(separatorPat.sub(os.linesep,line), spaceString, '\t'))
		else:
			writeto.write(separatorPat.sub(os.linesep,line))
		line=readfrom.readline()


def FixFile(filename=''):
	"""Check if file exists, rename it, open a new target file, call FixStream, delete old tmp file"""
	if os.path.isfile(filename) and not os.path.islink(filename):
		dirPart = os.path.dirname(filename)
		if len(dirPart)==0:
			dirPart=None
		oldName=os.tempnam(dirPart,os.path.basename(filename))
		try:
			os.rename(filename, oldName)
		except:
			sys.stderr.write('ERROR: cannot rename TMP file in directory \"'+oldName+'\"'+os.linesep)
			sys.exit(1)
		try:
			outfile=open(filename,'w')
		except:
			sys.stderr.write('ERROR: cannot write new file in directory \"'+filename+'\"'+os.linesep)
			os.rename(oldName,filename)
			sys.exit(1)

		infile=open(oldName,'r')
		FixStream(infile,outfile)
		infile.close()
		outfile.close()
		os.remove(oldName)
	else:
		sys.stderr.write('ERROR: \"'+filename+'\" is not a regular file'+os.linesep)
		sys.exit(1)


def usage():
	"""Output usage message."""
	sys.stderr.write('Usage: '+sys.argv[0]+' [options] [ <directory> | <file1> <file2> ...]\n')
	sys.stderr.write('  This script removes all trailing line separator garbage in textfiles\n')
	sys.stderr.write('  and changes the line separator according to the current platform.\n')
	sys.stderr.write('  Options:\n')
	sys.stderr.write('    [ -h | --help ]  print this message\n')
	sys.stderr.write('    [ -r N | --replace=N ]  replace N spaces with a tab everywhere\n')
	sys.stderr.write('\n')

def main():
	"""Main action."""
	global spaceReplace, spaceString
	# look for command line options: <source> <target>
	opts=[]
	args=[]
	try:
		opts, args = getopt.getopt(sys.argv[1:], "hr:", ['help', 'replace'])
	except getopt.GetoptError:
		# print help information and exit:
		usage()
		sys.exit(2)
	output = None
	for o, a in opts:
		if o in ("-h", "--help"):
			usage()
			sys.exit(0)
		if o in ("-r", "--replace"):
			spaceReplace = int(a)
			spaceString = ' '*spaceReplace
	if len(args)==1:
		# we have one argument, see if it is a directory, if yes we do all files
		if os.path.isdir(args[0]):
			for item in os.listdir(args[0]):
				if (item!='.') or (item!='..'):
					FixFile(os.path.join(args[0],item))
		else:
			FixFile(args[0])
	elif len(args)>1:
		# we have multiple arguments, assume each is a file and treat it individually
		for item in args:
			FixFile(item)
	else:
		# assume stdin and stdout
		FixStream()


if __name__=="__main__":
	main()

