From 2ef5e265ee7ca39878b52e4628bdd3178fce6529 Mon Sep 17 00:00:00 2001 From: Floke Date: Fri, 27 Jun 2025 11:08:01 +0200 Subject: [PATCH] Add reindent.py script --- reindent.py | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 328 insertions(+), 13 deletions(-) diff --git a/reindent.py b/reindent.py index 6b543ca9..f6dadaac 100644 --- a/reindent.py +++ b/reindent.py @@ -1,18 +1,333 @@ -# reindent.py +#! /usr/bin/env python3 + +# Released to the public domain, by Tim Peters, 03 October 2000. + +"""reindent [-d][-r][-v] [ path ... ] + +-d (--dryrun) Dry run. Analyze, but don't make any changes to, files. +-r (--recurse) Recurse. Search for all .py files in subdirectories too. +-n (--nobackup) No backup. Does not make a ".bak" file before reindenting. +-v (--verbose) Verbose. Print informative msgs; else no output. + (--newline) Newline. Specify the newline character to use (CRLF, LF). + Default is the same as the original file. +-h (--help) Help. Print this usage information and exit. + +Change Python (.py) files to use 4-space indents and no hard tab characters. +Also trim excess spaces and tabs from ends of lines, and remove empty lines +at the end of files. Also ensure the last line ends with a newline. + +If no paths are given on the command line, reindent operates as a filter, +reading a single source file from standard input and writing the transformed +source to standard output. In this case, the -d, -r and -v flags are +ignored. + +You can pass one or more file and/or directory paths. When a directory +path, all .py files within the directory will be examined, and, if the -r +option is given, likewise recursively for subdirectories. + +If output is not to standard output, reindent overwrites files in place, +renaming the originals with a .bak extension. If it finds nothing to +change, the file is left alone. If reindent does change a file, the changed +file is a fixed-point for future runs (i.e., running reindent on the +resulting .py file won't change it again). + +The hard part of reindenting is figuring out what to do with comment +lines. So long as the input files get a clean bill of health from +tabnanny.py, reindent should do a good job. + +The backup file is a copy of the one that is being reindented. The ".bak" +file is generated with shutil.copy(), but some corner cases regarding +user/group and permissions could leave the backup file more readable than +you'd prefer. You can always use the --nobackup option to prevent this. +""" + +__version__ = "1" + +import tokenize +import os +import shutil import sys -in_path = sys.argv[1] -out_path = sys.argv[2] +verbose = False +recurse = False +dryrun = False +makebackup = True +# A specified newline to be used in the output (set by --newline option) +spec_newline = None -with open(in_path, 'r', encoding='utf-8') as f: - lines = f.readlines() -fixed = [] -for line in lines: - # Tabs → 4 Spaces, Strip trailing Spaces - new = line.expandtabs(4).rstrip() - fixed.append(new + '\n') +def usage(msg=None): + if msg is None: + msg = __doc__ + print(msg, file=sys.stderr) -with open(out_path, 'w', encoding='utf-8') as f: - f.writelines(fixed) -print(f"Neu eingerückt und gespeichert: {out_path}") + +def errprint(*args): + sys.stderr.write(" ".join(str(arg) for arg in args)) + sys.stderr.write("\n") + +def main(): + import getopt + global verbose, recurse, dryrun, makebackup, spec_newline + try: + opts, args = getopt.getopt(sys.argv[1:], "drnvh", + ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) + except getopt.error as msg: + usage(msg) + return + for o, a in opts: + if o in ('-d', '--dryrun'): + dryrun = True + elif o in ('-r', '--recurse'): + recurse = True + elif o in ('-n', '--nobackup'): + makebackup = False + elif o in ('-v', '--verbose'): + verbose = True + elif o in ('--newline',): + if not a.upper() in ('CRLF', 'LF'): + usage() + return + spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] + elif o in ('-h', '--help'): + usage() + return + if not args: + r = Reindenter(sys.stdin) + r.run() + r.write(sys.stdout) + return + for arg in args: + check(arg) + + +def check(file): + if os.path.isdir(file) and not os.path.islink(file): + if verbose: + print("listing directory", file) + names = os.listdir(file) + for name in names: + fullname = os.path.join(file, name) + if ((recurse and os.path.isdir(fullname) and + not os.path.islink(fullname) and + not os.path.split(fullname)[1].startswith(".")) + or name.lower().endswith(".py")): + check(fullname) + return + + if verbose: + print("checking", file, "...", end=' ') + with open(file, 'rb') as f: + try: + encoding, _ = tokenize.detect_encoding(f.readline) + except SyntaxError as se: + errprint("%s: SyntaxError: %s" % (file, str(se))) + return + try: + with open(file, encoding=encoding) as f: + r = Reindenter(f) + except IOError as msg: + errprint("%s: I/O Error: %s" % (file, str(msg))) + return + + newline = spec_newline if spec_newline else r.newlines + if isinstance(newline, tuple): + errprint("%s: mixed newlines detected; cannot continue without --newline" % file) + return + + if r.run(): + if verbose: + print("changed.") + if dryrun: + print("But this is a dry run, so leaving it alone.") + if not dryrun: + bak = file + ".bak" + if makebackup: + shutil.copyfile(file, bak) + if verbose: + print("backed up", file, "to", bak) + with open(file, "w", encoding=encoding, newline=newline) as f: + r.write(f) + if verbose: + print("wrote new", file) + return True + else: + if verbose: + print("unchanged.") + return False + + +def _rstrip(line, JUNK='\n \t'): + """Return line stripped of trailing spaces, tabs, newlines. + + Note that line.rstrip() instead also strips sundry control characters, + but at least one known Emacs user expects to keep junk like that, not + mentioning Barry by name or anything . + """ + + i = len(line) + while i > 0 and line[i - 1] in JUNK: + i -= 1 + return line[:i] + + +class Reindenter: + + def __init__(self, f): + self.find_stmt = 1 # next token begins a fresh stmt? + self.level = 0 # current indent level + + # Raw file lines. + self.raw = f.readlines() + + # File lines, rstripped & tab-expanded. Dummy at start is so + # that we can use tokenize's 1-based line numbering easily. + # Note that a line is all-blank iff it's "\n". + self.lines = [_rstrip(line).expandtabs() + "\n" + for line in self.raw] + self.lines.insert(0, None) + self.index = 1 # index into self.lines of next line + + # List of (lineno, indentlevel) pairs, one for each stmt and + # comment line. indentlevel is -1 for comment lines, as a + # signal that tokenize doesn't know what to do about them; + # indeed, they're our headache! + self.stats = [] + + # Save the newlines found in the file so they can be used to + # create output without mutating the newlines. + self.newlines = f.newlines + + def run(self): + tokens = tokenize.generate_tokens(self.getline) + for _token in tokens: + self.tokeneater(*_token) + # Remove trailing empty lines. + lines = self.lines + while lines and lines[-1] == "\n": + lines.pop() + # Sentinel. + stats = self.stats + stats.append((len(lines), 0)) + # Map count of leading spaces to # we want. + have2want = {} + # Program after transformation. + after = self.after = [] + # Copy over initial empty lines -- there's nothing to do until + # we see a line with *something* on it. + i = stats[0][0] + after.extend(lines[1:i]) + for i in range(len(stats) - 1): + thisstmt, thislevel = stats[i] + nextstmt = stats[i + 1][0] + have = getlspace(lines[thisstmt]) + want = thislevel * 4 + if want < 0: + # A comment line. + if have: + # An indented comment line. If we saw the same + # indentation before, reuse what it most recently + # mapped to. + want = have2want.get(have, -1) + if want < 0: + # Then it probably belongs to the next real stmt. + for j in range(i + 1, len(stats) - 1): + jline, jlevel = stats[j] + if jlevel >= 0: + if have == getlspace(lines[jline]): + want = jlevel * 4 + break + if want < 0: # Maybe it's a hanging + # comment like this one, + # in which case we should shift it like its base + # line got shifted. + for j in range(i - 1, -1, -1): + jline, jlevel = stats[j] + if jlevel >= 0: + want = have + (getlspace(after[jline - 1]) - + getlspace(lines[jline])) + break + if want < 0: + # Still no luck -- leave it alone. + want = have + else: + want = 0 + assert want >= 0 + have2want[have] = want + diff = want - have + if diff == 0 or have == 0: + after.extend(lines[thisstmt:nextstmt]) + else: + for line in lines[thisstmt:nextstmt]: + if diff > 0: + if line == "\n": + after.append(line) + else: + after.append(" " * diff + line) + else: + remove = min(getlspace(line), -diff) + after.append(line[remove:]) + return self.raw != self.after + + def write(self, f): + f.writelines(self.after) + + # Line-getter for tokenize. + def getline(self): + if self.index >= len(self.lines): + line = "" + else: + line = self.lines[self.index] + self.index += 1 + return line + + # Line-eater for tokenize. + def tokeneater(self, type, token, slinecol, end, line, + INDENT=tokenize.INDENT, + DEDENT=tokenize.DEDENT, + NEWLINE=tokenize.NEWLINE, + COMMENT=tokenize.COMMENT, + NL=tokenize.NL): + + if type == NEWLINE: + # A program statement, or ENDMARKER, will eventually follow, + # after some (possibly empty) run of tokens of the form + # (NL | COMMENT)* (INDENT | DEDENT+)? + self.find_stmt = 1 + + elif type == INDENT: + self.find_stmt = 1 + self.level += 1 + + elif type == DEDENT: + self.find_stmt = 1 + self.level -= 1 + + elif type == COMMENT: + if self.find_stmt: + self.stats.append((slinecol[0], -1)) + # but we're still looking for a new stmt, so leave + # find_stmt alone + + elif type == NL: + pass + + elif self.find_stmt: + # This is the first "real token" following a NEWLINE, so it + # must be the first token of the next program statement, or an + # ENDMARKER. + self.find_stmt = 0 + if line: # not endmarker + self.stats.append((slinecol[0], self.level)) + + +# Count number of leading blanks. +def getlspace(line): + i, n = 0, len(line) + while i < n and line[i] == " ": + i += 1 + return i + + +if __name__ == '__main__': + main()