Selfie
Loading...
Searching...
No Matches
Literals.py
Go to the documentation of this file.
1import io
2import re
3from abc import abstractmethod
4from enum import Enum, auto
5from typing import Any, Optional, Protocol, TypeVar
6
7from .EscapeLeadingWhitespace import EscapeLeadingWhitespace
8
9T = TypeVar("T")
10
11
12class Language(Enum):
13 PYTHON = auto()
14
15 @classmethod
16 def from_filename(cls, filename: str) -> "Language":
17 extension = filename.rsplit(".", 1)[-1]
18 if extension == "py":
19 return cls.PYTHON
20 else:
21 raise ValueError(f"Unknown language for file {filename}")
22
23
25 def __init__(self, expected: Optional[T], actual: T, fmt: "LiteralFormat") -> None:
26 self.expected = expected
27 self.actual = actual
28 self.format = fmt
29
30
31class LiteralFormat(Protocol[T]):
32 @abstractmethod
33 def encode(
34 self, value: T, language: Language, encoding_policy: "EscapeLeadingWhitespace"
35 ) -> str:
36 raise NotImplementedError("Subclasses must implement the encode method")
37
38 @abstractmethod
39 def parse(self, string: str, language: Language) -> T:
40 raise NotImplementedError("Subclasses must implement the parse method")
41
42
43MAX_RAW_NUMBER = 1000
44PADDING_SIZE = len(str(MAX_RAW_NUMBER)) - 1
45
46
47def _encode_int_underscores(buffer: io.StringIO, value: int) -> str:
48 if value >= MAX_RAW_NUMBER:
49 mod = value % MAX_RAW_NUMBER
50 left_padding = PADDING_SIZE - len(str(mod))
51 _encode_int_underscores(buffer, value // MAX_RAW_NUMBER)
52 buffer.write("_")
53 buffer.write("0" * left_padding)
54 buffer.write(str(mod))
55 return buffer.getvalue()
56 elif value < 0:
57 buffer.write("-")
58 _encode_int_underscores(buffer, abs(value))
59 return buffer.getvalue()
60 else:
61 buffer.write(str(value))
62 return buffer.getvalue()
63
64
65TRIPLE_QUOTE = '"""'
66
67
69 def encode(
70 self, value: str, language: Language, encoding_policy: EscapeLeadingWhitespace
71 ) -> str:
72 if language == Language.PYTHON:
73 if "\n" not in value:
74 return self._encodeSinglePython(value)
75 else:
76 return self.encodeMultiPython(value, encoding_policy)
77 else:
78 raise NotImplementedError(
79 f"Encoding for language {language} is not implemented."
80 )
81
82 def parse(self, string: str, language: Language) -> str:
83 if language == Language.PYTHON:
84 if not string.startswith(TRIPLE_QUOTE):
85 return self._parseSinglePython(string)
86 else:
87 return self.parseMultiPython(string)
88 else:
89 raise NotImplementedError(
90 f"Encoding for language {language} is not implemented."
91 )
92
93 def _encodeSinglePython(self, value: str) -> str:
94 source = io.StringIO()
95 source.write('"')
96 for char in value:
97 if char == "\b":
98 source.write("\\b")
99 elif char == "\n":
100 source.write("\\n")
101 elif char == "\r":
102 source.write("\\r")
103 elif char == "\t":
104 source.write("\\t")
105 elif char == '"':
106 source.write('\\"')
107 elif char == "\\":
108 source.write("\\\\")
109 elif self._is_control_char(char):
110 source.write("\\u" + str(ord(char)).zfill(4))
111 else:
112 source.write(char)
113 source.write('"')
114 return source.getvalue()
115
116 def _is_control_char(self, c: str) -> bool:
117 return c in "\u0000\u001f" or c == "\u007f"
118
119 # combined logic from parseSingleJava and parseSingleJavaish
120 def _parseSinglePython(self, source_with_quotes: str) -> str:
121 assert source_with_quotes.startswith('"')
122 assert source_with_quotes.endswith('"')
123 source = source_with_quotes[1:-1]
124 to_unescape = self.inline_backslashes(source) # changed from inline_dollar
125 return self._unescape_python(to_unescape)
126
128 self, arg: str, escape_leading_whitespace: EscapeLeadingWhitespace
129 ) -> str:
130 escape_backslashes = arg.replace("\\", "\\\\")
131 escape_triple_quotes = escape_backslashes.replace(TRIPLE_QUOTE, '\\"\\"\\"')
132
133 def protect_trailing_whitespace(line: str) -> str:
134 if line.endswith(" "):
135 return line[:-1] + "\\u0020"
136 elif line.endswith("\t"):
137 return line[:-1] + "\\t"
138 else:
139 return line
140
141 lines = escape_triple_quotes.splitlines()
142 protect_whitespace = "\n".join(
143 escape_leading_whitespace.escape_line(
144 protect_trailing_whitespace(line), "\\u0020", "\\t"
145 )
146 for line in lines
147 )
148
149 return f"{TRIPLE_QUOTE}{protect_whitespace}{TRIPLE_QUOTE}"
150
151 _char_literal_pattern = re.compile(r"""\{'(\\?.)'\}""")
152
153 def inline_backslashes(self, source: str) -> str:
154 def replace_char(char_literal: str) -> str:
155 if len(char_literal) == 1:
156 return char_literal
157 elif len(char_literal) == 2 and char_literal[0] == "\\":
158 if char_literal[1] == "t":
159 return "\t"
160 elif char_literal[1] == "b":
161 return "\b"
162 elif char_literal[1] == "n":
163 return "\n"
164 elif char_literal[1] == "r":
165 return "\r"
166 elif char_literal[1] == "'":
167 return "'"
168 elif char_literal[1] == "\\":
169 return "\\"
170 else:
171 raise ValueError(f"Unknown character literal {char_literal}")
172 else:
173 raise ValueError(f"Unknown character literal {char_literal}")
174
175 return self._char_literal_pattern.sub(
176 lambda match: replace_char(match.group(1)), source
177 )
178
179 def _unescape_python(self, source: str) -> str:
180 value = io.StringIO()
181 i = 0
182 while i < len(source):
183 c = source[i]
184 if c == "\\":
185 i += 1
186 c = source[i]
187 if c == '"':
188 value.write('"')
189 elif c == "\\":
190 value.write("\\")
191 elif c == "b":
192 value.write("\b")
193 elif c == "f":
194 value.write("\f")
195 elif c == "n":
196 value.write("\n")
197 elif c == "r":
198 value.write("\r")
199 elif c == "s":
200 value.write(" ")
201 elif c == "t":
202 value.write("\t")
203 elif c == "u":
204 code = int(source[i + 1 : i + 5], 16)
205 value.write(chr(code))
206 i += 4
207 else:
208 raise ValueError(f"Unknown escape sequence {c}")
209 else:
210 value.write(c)
211 i += 1
212 return value.getvalue()
213
214 def parseMultiPython(self, source_with_quotes: str) -> str:
215 assert source_with_quotes.startswith(TRIPLE_QUOTE)
216 assert source_with_quotes.endswith(TRIPLE_QUOTE)
217 return self._unescape_python(
218 source_with_quotes[len(TRIPLE_QUOTE) : -len(TRIPLE_QUOTE)]
219 )
220
221
224 self,
225 value: Any,
226 language: Language, # noqa: ARG002
227 encoding_policy: EscapeLeadingWhitespace, # noqa: ARG002
228 ) -> str:
229 if isinstance(value, int):
230 return _encode_int_underscores(io.StringIO(), value)
231 else:
232 return repr(value)
233
234 def parse(self, string: str, language: Language) -> Any:
235 raise NotImplementedError
236
237
238class TodoStub(Enum):
239 to_match_disk = auto()
240 to_be_file = auto()
241
242 def create_literal(self):
243 return LiteralValue(None, self, LiteralTodoStub())
244
245
248 self,
249 value: TodoStub,
250 language: Language,
251 encoding_policy: EscapeLeadingWhitespace,
252 ) -> str:
253 raise NotImplementedError
254
255 def parse(self, string: str, language: Language) -> TodoStub:
256 raise NotImplementedError
"Language" from_filename(cls, str filename)
Definition Literals.py:16
str encode(self, T value, Language language, "EscapeLeadingWhitespace" encoding_policy)
Definition Literals.py:35
T parse(self, str string, Language language)
Definition Literals.py:39
Any parse(self, str string, Language language)
Definition Literals.py:234
str encode(self, Any value, Language language, EscapeLeadingWhitespace encoding_policy)
Definition Literals.py:228
str _parseSinglePython(self, str source_with_quotes)
Definition Literals.py:120
str parseMultiPython(self, str source_with_quotes)
Definition Literals.py:214
str inline_backslashes(self, str source)
Definition Literals.py:153
str encodeMultiPython(self, str arg, EscapeLeadingWhitespace escape_leading_whitespace)
Definition Literals.py:129
str _unescape_python(self, str source)
Definition Literals.py:179
bool _is_control_char(self, str c)
Definition Literals.py:116
str _encodeSinglePython(self, str value)
Definition Literals.py:93
str parse(self, str string, Language language)
Definition Literals.py:82
str encode(self, str value, Language language, EscapeLeadingWhitespace encoding_policy)
Definition Literals.py:71
str encode(self, TodoStub value, Language language, EscapeLeadingWhitespace encoding_policy)
Definition Literals.py:252
TodoStub parse(self, str string, Language language)
Definition Literals.py:255
None __init__(self, Optional[T] expected, T actual, "LiteralFormat" fmt)
Definition Literals.py:25
str _encode_int_underscores(io.StringIO buffer, int value)
Definition Literals.py:47