0001 """Text wrapping and filling. 0002 """ 0003 0004 # Copyright (C) 1999-2001 Gregory P. Ward. 0005 # Copyright (C) 2002, 2003 Python Software Foundation. 0006 # Written by Greg Ward <gward@python.net> 0007 0008 __revision__ = "$Id: textwrap.py,v 1.35 2004/06/03 01:59:40 gward Exp $" 0009 0010 import string, re 0011 0012 # Do the right thing with boolean values for all known Python versions 0013 # (so this module can be copied to projects that don't depend on Python 0014 # 2.3, e.g. Optik and Docutils). 0015 try: 0016 True, False 0017 except NameError: 0018 (True, False) = (1, 0) 0019 0020 __all__ = ['TextWrapper', 'wrap', 'fill'] 0021 0022 # Hardcode the recognized whitespace characters to the US-ASCII 0023 # whitespace characters. The main reason for doing this is that in 0024 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales 0025 # that character winds up in string.whitespace. Respecting 0026 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the 0027 # same as any other whitespace char, which is clearly wrong (it's a 0028 # *non-breaking* space), 2) possibly cause problems with Unicode, 0029 # since 0xa0 is not in range(128). 0030 _whitespace = '\t\n\x0b\x0c\r ' 0031 0032 class TextWrapper: 0033 """ 0034 Object for wrapping/filling text. The public interface consists of 0035 the wrap() and fill() methods; the other methods are just there for 0036 subclasses to override in order to tweak the default behaviour. 0037 If you want to completely replace the main wrapping algorithm, 0038 you'll probably have to override _wrap_chunks(). 0039 0040 Several instance attributes control various aspects of wrapping: 0041 width (default: 70) 0042 the maximum width of wrapped lines (unless break_long_words 0043 is false) 0044 initial_indent (default: "") 0045 string that will be prepended to the first line of wrapped 0046 output. Counts towards the line's width. 0047 subsequent_indent (default: "") 0048 string that will be prepended to all lines save the first 0049 of wrapped output; also counts towards each line's width. 0050 expand_tabs (default: true) 0051 Expand tabs in input text to spaces before further processing. 0052 Each tab will become 1 .. 8 spaces, depending on its position in 0053 its line. If false, each tab is treated as a single character. 0054 replace_whitespace (default: true) 0055 Replace all whitespace characters in the input text by spaces 0056 after tab expansion. Note that if expand_tabs is false and 0057 replace_whitespace is true, every tab will be converted to a 0058 single space! 0059 fix_sentence_endings (default: false) 0060 Ensure that sentence-ending punctuation is always followed 0061 by two spaces. Off by default because the algorithm is 0062 (unavoidably) imperfect. 0063 break_long_words (default: true) 0064 Break words longer than 'width'. If false, those words will not 0065 be broken, and some lines might be longer than 'width'. 0066 """ 0067 0068 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) 0069 0070 unicode_whitespace_trans = {} 0071 uspace = ord(u' ') 0072 for x in map(ord, _whitespace): 0073 unicode_whitespace_trans[x] = uspace 0074 0075 # This funky little regex is just the trick for splitting 0076 # text up into word-wrappable chunks. E.g. 0077 # "Hello there -- you goof-ball, use the -b option!" 0078 # splits into 0079 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 0080 # (after stripping out empty strings). 0081 wordsep_re = re.compile(r'(\s+|' # any whitespace 0082 r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words 0083 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash 0084 0085 # XXX this is not locale- or charset-aware -- string.lowercase 0086 # is US-ASCII only (and therefore English-only) 0087 sentence_end_re = re.compile(r'[%s]' # lowercase letter 0088 r'[\.\!\?]' # sentence-ending punct. 0089 r'[\"\']?' # optional end-of-quote 0090 % string.lowercase) 0091 0092 0093 def __init__(self, 0094 width=70, 0095 initial_indent="", 0096 subsequent_indent="", 0097 expand_tabs=True, 0098 replace_whitespace=True, 0099 fix_sentence_endings=False, 0100 break_long_words=True): 0101 self.width = width 0102 self.initial_indent = initial_indent 0103 self.subsequent_indent = subsequent_indent 0104 self.expand_tabs = expand_tabs 0105 self.replace_whitespace = replace_whitespace 0106 self.fix_sentence_endings = fix_sentence_endings 0107 self.break_long_words = break_long_words 0108 0109 0110 # -- Private methods ----------------------------------------------- 0111 # (possibly useful for subclasses to override) 0112 0113 def _munge_whitespace(self, text): 0114 """_munge_whitespace(text : string) -> string 0115 0116 Munge whitespace in text: expand tabs and convert all other 0117 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz" 0118 becomes " foo bar baz". 0119 """ 0120 if self.expand_tabs: 0121 text = text.expandtabs() 0122 if self.replace_whitespace: 0123 if isinstance(text, str): 0124 text = text.translate(self.whitespace_trans) 0125 elif isinstance(text, unicode): 0126 text = text.translate(self.unicode_whitespace_trans) 0127 return text 0128 0129 0130 def _split(self, text): 0131 """_split(text : string) -> [string] 0132 0133 Split the text to wrap into indivisible chunks. Chunks are 0134 not quite the same as words; see wrap_chunks() for full 0135 details. As an example, the text 0136 Look, goof-ball -- use the -b option! 0137 breaks into the following chunks: 0138 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 0139 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 0140 """ 0141 chunks = self.wordsep_re.split(text) 0142 chunks = filter(None, chunks) 0143 return chunks 0144 0145 def _fix_sentence_endings(self, chunks): 0146 """_fix_sentence_endings(chunks : [string]) 0147 0148 Correct for sentence endings buried in 'chunks'. Eg. when the 0149 original text contains "... foo.\nBar ...", munge_whitespace() 0150 and split() will convert that to [..., "foo.", " ", "Bar", ...] 0151 which has one too few spaces; this method simply changes the one 0152 space to two. 0153 """ 0154 i = 0 0155 pat = self.sentence_end_re 0156 while i < len(chunks)-1: 0157 if chunks[i+1] == " " and pat.search(chunks[i]): 0158 chunks[i+1] = " " 0159 i += 2 0160 else: 0161 i += 1 0162 0163 def _handle_long_word(self, chunks, cur_line, cur_len, width): 0164 """_handle_long_word(chunks : [string], 0165 cur_line : [string], 0166 cur_len : int, width : int) 0167 0168 Handle a chunk of text (most likely a word, not whitespace) that 0169 is too long to fit in any line. 0170 """ 0171 space_left = max(width - cur_len, 1) 0172 0173 # If we're allowed to break long words, then do so: put as much 0174 # of the next chunk onto the current line as will fit. 0175 if self.break_long_words: 0176 cur_line.append(chunks[0][0:space_left]) 0177 chunks[0] = chunks[0][space_left:] 0178 0179 # Otherwise, we have to preserve the long word intact. Only add 0180 # it to the current line if there's nothing already there -- 0181 # that minimizes how much we violate the width constraint. 0182 elif not cur_line: 0183 cur_line.append(chunks.pop(0)) 0184 0185 # If we're not allowed to break long words, and there's already 0186 # text on the current line, do nothing. Next time through the 0187 # main loop of _wrap_chunks(), we'll wind up here again, but 0188 # cur_len will be zero, so the next line will be entirely 0189 # devoted to the long word that we can't handle right now. 0190 0191 def _wrap_chunks(self, chunks): 0192 """_wrap_chunks(chunks : [string]) -> [string] 0193 0194 Wrap a sequence of text chunks and return a list of lines of 0195 length 'self.width' or less. (If 'break_long_words' is false, 0196 some lines may be longer than this.) Chunks correspond roughly 0197 to words and the whitespace between them: each chunk is 0198 indivisible (modulo 'break_long_words'), but a line break can 0199 come between any two chunks. Chunks should not have internal 0200 whitespace; ie. a chunk is either all whitespace or a "word". 0201 Whitespace chunks will be removed from the beginning and end of 0202 lines, but apart from that whitespace is preserved. 0203 """ 0204 lines = [] 0205 if self.width <= 0: 0206 raise ValueError("invalid width %r (must be > 0)" % self.width) 0207 0208 while chunks: 0209 0210 # Start the list of chunks that will make up the current line. 0211 # cur_len is just the length of all the chunks in cur_line. 0212 cur_line = [] 0213 cur_len = 0 0214 0215 # Figure out which static string will prefix this line. 0216 if lines: 0217 indent = self.subsequent_indent 0218 else: 0219 indent = self.initial_indent 0220 0221 # Maximum width for this line. 0222 width = self.width - len(indent) 0223 0224 # First chunk on line is whitespace -- drop it, unless this 0225 # is the very beginning of the text (ie. no lines started yet). 0226 if chunks[0].strip() == '' and lines: 0227 del chunks[0] 0228 0229 while chunks: 0230 l = len(chunks[0]) 0231 0232 # Can at least squeeze this chunk onto the current line. 0233 if cur_len + l <= width: 0234 cur_line.append(chunks.pop(0)) 0235 cur_len += l 0236 0237 # Nope, this line is full. 0238 else: 0239 break 0240 0241 # The current line is full, and the next chunk is too big to 0242 # fit on *any* line (not just this one). 0243 if chunks and len(chunks[0]) > width: 0244 self._handle_long_word(chunks, cur_line, cur_len, width) 0245 0246 # If the last chunk on this line is all whitespace, drop it. 0247 if cur_line and cur_line[-1].strip() == '': 0248 del cur_line[-1] 0249 0250 # Convert current line back to a string and store it in list 0251 # of all lines (return value). 0252 if cur_line: 0253 lines.append(indent + ''.join(cur_line)) 0254 0255 return lines 0256 0257 0258 # -- Public interface ---------------------------------------------- 0259 0260 def wrap(self, text): 0261 """wrap(text : string) -> [string] 0262 0263 Reformat the single paragraph in 'text' so it fits in lines of 0264 no more than 'self.width' columns, and return a list of wrapped 0265 lines. Tabs in 'text' are expanded with string.expandtabs(), 0266 and all other whitespace characters (including newline) are 0267 converted to space. 0268 """ 0269 text = self._munge_whitespace(text) 0270 indent = self.initial_indent 0271 chunks = self._split(text) 0272 if self.fix_sentence_endings: 0273 self._fix_sentence_endings(chunks) 0274 return self._wrap_chunks(chunks) 0275 0276 def fill(self, text): 0277 """fill(text : string) -> string 0278 0279 Reformat the single paragraph in 'text' to fit in lines of no 0280 more than 'self.width' columns, and return a new string 0281 containing the entire wrapped paragraph. 0282 """ 0283 return "\n".join(self.wrap(text)) 0284 0285 0286 # -- Convenience interface --------------------------------------------- 0287 0288 def wrap(text, width=70, **kwargs): 0289 """Wrap a single paragraph of text, returning a list of wrapped lines. 0290 0291 Reformat the single paragraph in 'text' so it fits in lines of no 0292 more than 'width' columns, and return a list of wrapped lines. By 0293 default, tabs in 'text' are expanded with string.expandtabs(), and 0294 all other whitespace characters (including newline) are converted to 0295 space. See TextWrapper class for available keyword args to customize 0296 wrapping behaviour. 0297 """ 0298 w = TextWrapper(width=width, **kwargs) 0299 return w.wrap(text) 0300 0301 def fill(text, width=70, **kwargs): 0302 """Fill a single paragraph of text, returning a new string. 0303 0304 Reformat the single paragraph in 'text' to fit in lines of no more 0305 than 'width' columns, and return a new string containing the entire 0306 wrapped paragraph. As with wrap(), tabs are expanded and other 0307 whitespace characters converted to space. See TextWrapper class for 0308 available keyword args to customize wrapping behaviour. 0309 """ 0310 w = TextWrapper(width=width, **kwargs) 0311 return w.fill(text) 0312 0313 0314 # -- Loosely related functionality ------------------------------------- 0315 0316 def dedent(text): 0317 """dedent(text : string) -> string 0318 0319 Remove any whitespace than can be uniformly removed from the left 0320 of every line in `text`. 0321 0322 This can be used e.g. to make triple-quoted strings line up with 0323 the left edge of screen/whatever, while still presenting it in the 0324 source code in indented form. 0325 0326 For example: 0327 0328 def test(): 0329 # end first line with \ to avoid the empty line! 0330 s = '''\ 0331 hello 0332 world 0333 ''' 0334 print repr(s) # prints ' hello\n world\n ' 0335 print repr(dedent(s)) # prints 'hello\n world\n' 0336 """ 0337 lines = text.expandtabs().split('\n') 0338 margin = None 0339 for line in lines: 0340 content = line.lstrip() 0341 if not content: 0342 continue 0343 indent = len(line) - len(content) 0344 if margin is None: 0345 margin = indent 0346 else: 0347 margin = min(margin, indent) 0348 0349 if margin is not None and margin > 0: 0350 for i in range(len(lines)): 0351 lines[i] = lines[i][margin:] 0352 0353 return '\n'.join(lines) 0354
Generated by PyXR 0.9.4