Source code for Text.ParseWords

#!/usr/bin/env python3
# Generated by "pythonizer -aM PyModules/Text/ParseWords.pm" v1.024 run by SNOOPYJC on Thu Feb  2 11:48:41 2023
__author__ = """Joe Cool"""
__email__ = "snoopyjc@gmail.com"
__version__ = "1.024"
import builtins, perllib, re

_bn = lambda s: "" if s is None else s
_str = lambda s: "" if s is None else str(s)
_locals_stack = []


[docs]class FunctionReturn(Exception): pass
perllib.init_package("Text.ParseWords")
[docs]def parse_line(*_args, wantarray=False): try: _locals_stack.append(perllib.WARNING) [delimiter, keep, line] = perllib.list_of_n(_args, 3) word = "" pieces = perllib.Array() perllib.WARNING = 0 while len(_str(line)): # This pattern is optimised to be stack conservative on older perls. # Do not refactor without being careful and testing it on very long strings. # See Perl bug #42980 for an example of a stack busting input. # SNOOPYJC: I removed the (?>...) atomic groups because they are not supported until python 3.11 # SNOOPYJC $line =~ s/^ # SNOOPYJC (?: # SNOOPYJC # double quoted string # SNOOPYJC (") # $quote # SNOOPYJC ((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted # SNOOPYJC | # --OR-- # SNOOPYJC # singe quoted string # SNOOPYJC (') # $quote # SNOOPYJC ((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted # SNOOPYJC | # --OR-- # SNOOPYJC # unquoted string # SNOOPYJC ( # $unquoted # SNOOPYJC (?:\\.|[^\\"'])*? # SNOOPYJC ) # SNOOPYJC # followed by # SNOOPYJC ( # $delim # SNOOPYJC \Z(?!\n) # EOL # SNOOPYJC | # --OR-- # SNOOPYJC (?-x:$delimiter) # delimiter # SNOOPYJC | # --OR-- # SNOOPYJC (?!^)(?=["']) # a quote # SNOOPYJC ) # SNOOPYJC )//xs or return; # extended layout def _f116(_m_): global _m _m = _m_ return _m.expand(r"") if not ( ( ( ( line := ( _s := perllib.substitute_and_count( re.compile( rf"""^ (?: # double quoted string (") # $quote ([^\\"]*(?:\\.[^\\"]*)*)" # $quoted | # --OR-- # singe quoted string (') # $quote ([^\\']*(?:\\.[^\\']*)*)' # $quoted | # --OR-- # unquoted string ( # $unquoted (?:\\.|[^\\"'])*? ) # followed by ( # $delim $(?!\n) # EOL | # --OR-- (?-x:{_bn(delimiter)}) # delimiter | # --OR-- (?!^)(?=["']) # a quote ) )""", re.X | re.S, ), _f116, count=1, var=_str(line), ) )[0] ), _s, )[1][1] ) ): return perllib.Array() if wantarray else None # extended layout [quote, quoted, unquoted, delim] = perllib.list_of_n( perllib.flatten( ( ( (_m.group(1), _m.group(2)) if _m.group(1) else (_m.group(3), _m.group(4)) ), _m.group(5), _m.group(6), ) ), 4, ) if not (quote is not None or len(_str(unquoted)) or len(_str(delim))): return perllib.Array() if wantarray else None if keep: quoted = f"{_bn(quote)}{_bn(quoted)}{_bn(quote)}" else: unquoted = re.sub(re.compile(r"\\(.)", re.S), r"\g<1>", _str(unquoted), count=0) if quote is not None: if _str(quote) == '"': quoted = re.sub( re.compile(r"\\(.)", re.S), r"\g<1>", _str(quoted), count=0 ) if Text.ParseWords.PERL_SINGLE_QUOTE_v and _str(quote) == "'": quoted = re.sub(re.compile(r"\\([\\'])"), r"\g<1>", _str(quoted), count=0) word += _str(line)[0:0] # leave results tainted word += _str(quoted) if quote is not None else _str(unquoted) if len(_str(delim)): pieces.append(word) if _str(keep) == "delimiters": pieces.append(delim) word = "" if not len(_str(line)): pieces.append(word) return pieces except FunctionReturn as _r: return _r.args[0] finally: perllib.WARNING = _locals_stack.pop()
Text.ParseWords.parse_line = parse_line # SNOOPYJC: I removed this deprecated function because it doesn't Pythonize correctly # SNOOPYJC sub old_shellwords { # SNOOPYJC # SNOOPYJC # Usage: # SNOOPYJC # use ParseWords; # SNOOPYJC # @words = old_shellwords($line); # SNOOPYJC # or # SNOOPYJC # @words = old_shellwords(@lines); # SNOOPYJC # or # SNOOPYJC # @words = old_shellwords(); # defaults to $_ (and clobbers it) # SNOOPYJC # SNOOPYJC no warnings 'uninitialized'; # we will be testing undef strings # SNOOPYJC local *_ = \join('', @_) if @_; # SNOOPYJC my (@words, $snippet); # SNOOPYJC # SNOOPYJC s/\A\s+//; # SNOOPYJC while ($_ ne '') { # SNOOPYJC my $field = substr($_, 0, 0); # leave results tainted # SNOOPYJC for (;;) { # SNOOPYJC if (s/\A"(([^"\\]|\\.)*)"//s) { # SNOOPYJC ($snippet = $1) =~ s#\\(.)#$1#sg; # SNOOPYJC } # SNOOPYJC elsif (/\A"/) { # SNOOPYJC require Carp; # SNOOPYJC Carp::carp("Unmatched double quote: $_"); # SNOOPYJC return(); # SNOOPYJC } # SNOOPYJC elsif (s/\A'(([^'\\]|\\.)*)'//s) { # SNOOPYJC ($snippet = $1) =~ s#\\(.)#$1#sg; # SNOOPYJC } # SNOOPYJC elsif (/\A'/) { # SNOOPYJC require Carp; # SNOOPYJC Carp::carp("Unmatched single quote: $_"); # SNOOPYJC return(); # SNOOPYJC } # SNOOPYJC elsif (s/\A\\(.?)//s) { # SNOOPYJC $snippet = $1; # SNOOPYJC } # SNOOPYJC elsif (s/\A([^\s\\'"]+)//) { # SNOOPYJC $snippet = $1; # SNOOPYJC } # SNOOPYJC else { # SNOOPYJC s/\A\s+//; # SNOOPYJC last; # SNOOPYJC } # SNOOPYJC $field .= $snippet; # SNOOPYJC } # SNOOPYJC push(@words, $field); # SNOOPYJC } # SNOOPYJC return @words; # SNOOPYJC }
[docs]def nested_quotewords(*_args, wantarray=False): [delim, keep, *lines] = perllib.list_of_at_least_n(_args, 2) lines = perllib.Array(lines) i = 0 allwords = perllib.Array() for i in range(0, len(lines)): allwords[i] = perllib.Array(parse_line(delim, keep, lines[i])) if not (allwords[i] or not len(_str(lines[i]))): return perllib.Array() if wantarray else None return allwords
Text.ParseWords.nested_quotewords = nested_quotewords
[docs]def quotewords(*_args, wantarray=False): [delim, keep, *lines] = perllib.list_of_at_least_n(_args, 2) lines = perllib.Array(lines) line = None words = perllib.Array() allwords = perllib.Array() for line_l in lines: words = perllib.Array(parse_line(delim, keep, line_l, wantarray=True)) if not (words or not len(_str(line_l))): return perllib.Array() if wantarray else None allwords.extend(words) return allwords
Text.ParseWords.quotewords = quotewords
[docs]def shellwords(*_args, wantarray=False): lines = list(_args) allwords = perllib.Array() for _i21, line_l in enumerate(lines): line_l = re.sub(r"^\s+", r"", _str(line_l), count=1) lines[_i21] = line_l words = perllib.Array(parse_line("\s+", 0, line_l, wantarray=True)) if words and not words.get(len(words) - 1) is not None: (words.pop() if words else None) if not (words or not len(_str(line_l))): return perllib.Array() if wantarray else None allwords.extend(words) return allwords
Text.ParseWords.shellwords = shellwords Text.ParseWords.PERL_SINGLE_QUOTE_v = perllib.init_global( "Text.ParseWords", "PERL_SINGLE_QUOTE_v", "" ) builtins.__PACKAGE__ = "Text.ParseWords" # SKIPPED: use strict; perllib.WARNING = 1 # SKIPPED: require 5.006; Text.ParseWords.VERSION_v = "3.31" # SKIPPED: use Exporter; Text.ParseWords.ISA_a = "Exporter".split() Text.ParseWords.EXPORT_a = "shellwords quotewords nested_quotewords parse_line".split() # SNOOPYJC our @EXPORT_OK = qw(old_shellwords); Text.ParseWords.EXPORT_OK_a = perllib.Array()