Coverage for src / taipanstack / security / validators.py: 100%
153 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-12 21:18 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-12 21:18 +0000
1"""
2Input validators for type-safe validation.
4Provides validation functions for common input types like email,
5project names, URLs, etc. All validators raise ValueError on invalid input.
6"""
8import re
9import urllib.parse
10from urllib.parse import urlsplit
12# Constants to avoid magic values (PLR2004)
13PYTHON_MAJOR_VERSION = 3
14MIN_PYTHON_MINOR_VERSION = 10
15MAX_PYTHON_VERSION_LENGTH = 20
16MAX_EMAIL_LOCAL_LENGTH = 64
17MAX_EMAIL_DOMAIN_LENGTH = 255
18MAX_URL_LENGTH = 2048
19LOCALHOST_DOMAINS = ("localhost", "127.0.0.1", "::1")
20PROJECT_NAME_RESERVED = frozenset(
21 {
22 "test",
23 "tests",
24 "src",
25 "lib",
26 "bin",
27 "build",
28 "dist",
29 "setup",
30 "config",
31 "settings",
32 "core",
33 "main",
34 "app",
35 "site-packages",
36 }
37)
40def _validate_type(
41 value: object, expected_type: type | tuple[type, ...], name: str
42) -> None:
43 """Validate input type.
45 Args:
46 value: The value to check.
47 expected_type: The expected type(s).
48 name: Name of the variable for the error message.
50 Raises:
51 TypeError: If value is not of the expected type.
53 """
54 if not isinstance(value, expected_type):
55 type_name = (
56 expected_type.__name__
57 if isinstance(expected_type, type)
58 else " | ".join(t.__name__ for t in expected_type)
59 )
60 msg = f"{name} must be {type_name}, got {type(value).__name__}"
61 raise TypeError(msg)
64def _check_project_name_length(name: str, max_length: int) -> None:
65 """Check project name length.
67 Args:
68 name: The project name.
69 max_length: Maximum allowed length.
71 Raises:
72 ValueError: If length is invalid.
74 """
75 if not name:
76 msg = "Project name cannot be empty"
77 raise ValueError(msg)
79 if len(name) > max_length:
80 msg = f"Project name exceeds maximum length of {max_length}"
81 raise ValueError(msg)
84def _build_project_name_pattern(allow_hyphen: bool, allow_underscore: bool) -> str:
85 """Build the regex pattern for allowed characters."""
86 allowed = r"a-zA-Z0-9"
87 if allow_hyphen:
88 allowed += r"-"
89 if allow_underscore:
90 allowed += r"_"
91 return rf"^[a-zA-Z][{allowed}]*\Z"
94def _build_invalid_chars_msg(allow_hyphen: bool, allow_underscore: bool) -> str:
95 """Build the error message for invalid characters."""
96 hyphen_msg = ", hyphens" if allow_hyphen else ""
97 underscore_msg = ", underscores" if allow_underscore else ""
98 return (
99 f"Project name contains invalid characters. "
100 f"Allowed: letters, numbers{hyphen_msg}{underscore_msg}"
101 )
104def _check_project_name_chars(
105 name: str, allow_hyphen: bool, allow_underscore: bool
106) -> None:
107 """Check project name characters.
109 Args:
110 name: The project name.
111 allow_hyphen: Whether to allow hyphens.
112 allow_underscore: Whether to allow underscores.
114 Raises:
115 ValueError: If name contains invalid characters.
117 """
118 pattern = _build_project_name_pattern(allow_hyphen, allow_underscore)
120 if not re.match(pattern, name):
121 if not name[0].isalpha():
122 msg = "Project name must start with a letter"
123 raise ValueError(msg)
124 msg = _build_invalid_chars_msg(allow_hyphen, allow_underscore)
125 raise ValueError(msg)
128def _check_project_name_reserved(name: str) -> None:
129 """Check if project name is reserved.
131 Args:
132 name: The project name.
134 Raises:
135 ValueError: If name is reserved.
137 """
138 if name.lower() in PROJECT_NAME_RESERVED:
139 msg = f"Project name '{name}' is reserved"
140 raise ValueError(msg)
143def validate_project_name(
144 name: str,
145 *,
146 max_length: int = 100,
147 allow_hyphen: bool = True,
148 allow_underscore: bool = True,
149) -> str:
150 """Validate a project name.
152 Args:
153 name: The project name to validate.
154 max_length: Maximum allowed length.
155 allow_hyphen: Allow hyphens in name.
156 allow_underscore: Allow underscores in name.
158 Returns:
159 The validated project name.
161 Raises:
162 ValueError: If the name is invalid.
164 Example:
165 >>> validate_project_name("my_project")
166 'my_project'
167 >>> validate_project_name("123project")
168 ValueError: Project name must start with a letter
170 """
171 _validate_type(name, str, "Project name")
172 _check_project_name_length(name, max_length)
173 _check_project_name_chars(name, allow_hyphen, allow_underscore)
174 _check_project_name_reserved(name)
176 return name
179def _check_version_format(version: str) -> None:
180 """Check the basic formatting and safety of a version string."""
181 # Prevent DoS from massive integer string conversion limit in Python
182 if len(version) > MAX_PYTHON_VERSION_LENGTH:
183 msg = "Version string exceeds maximum length"
184 raise ValueError(msg)
186 if "\x00" in version or not version.isprintable():
187 msg = "Version contains invalid characters"
188 raise ValueError(msg)
190 if not version.isascii():
191 msg = f"Invalid version format: '{version}'. Use 'X.Y' format (e.g., '3.12')"
192 raise ValueError(msg)
194 pattern = r"^\d+\.\d+\Z"
196 if not re.match(pattern, version):
197 msg = f"Invalid version format: '{version}'. Use 'X.Y' format (e.g., '3.12')"
198 raise ValueError(msg)
201def _check_version_numbers(version: str) -> None:
202 """Check the major and minor version numbers."""
203 try:
204 major, minor = map(int, version.split("."))
205 except ValueError as e:
206 msg = f"Invalid version numbers in '{version}'"
207 raise ValueError(msg) from e
209 if major != PYTHON_MAJOR_VERSION:
210 msg = f"Only Python 3.x is supported, got {major}.x"
211 raise ValueError(msg)
213 if minor < MIN_PYTHON_MINOR_VERSION:
214 msg = (
215 f"Python 3.{minor} is not supported. "
216 f"Minimum is 3.{MIN_PYTHON_MINOR_VERSION}"
217 )
218 raise ValueError(msg)
221def validate_python_version(version: str) -> str:
222 """Validate Python version string.
224 Args:
225 version: Version string like "3.12" or "3.10".
227 Returns:
228 The validated version string.
230 Raises:
231 ValueError: If version format is invalid or unsupported.
233 """
234 _validate_type(version, str, "Version")
235 _check_version_format(version)
236 _check_version_numbers(version)
237 return version
240def _check_email_basics(email: str) -> None:
241 """Check basic email constraints like empty, length and invalid characters."""
242 if not email:
243 msg = "Email cannot be empty"
244 raise ValueError(msg)
246 if len(email) > MAX_EMAIL_LOCAL_LENGTH + 1 + MAX_EMAIL_DOMAIN_LENGTH:
247 msg = "Email length exceeds maximum allowed"
248 raise ValueError(msg)
250 if "\x00" in email or not email.isprintable():
251 msg = "Email contains invalid characters"
252 raise ValueError(msg)
255def _check_email_format(email: str) -> None:
256 """Check email format and basic constraints."""
257 _check_email_basics(email)
259 # RFC 5322 compliant pattern (simplified)
260 pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\Z"
262 if not re.match(pattern, email):
263 msg = f"Invalid email format: {email}"
264 raise ValueError(msg)
267def _check_email_parts(email: str) -> None:
268 """Check local and domain parts of the email."""
269 local, domain = email.rsplit("@", 1)
271 if len(local) > MAX_EMAIL_LOCAL_LENGTH:
272 msg = f"Email local part exceeds {MAX_EMAIL_LOCAL_LENGTH} characters"
273 raise ValueError(msg)
275 if len(domain) > MAX_EMAIL_DOMAIN_LENGTH:
276 msg = f"Email domain exceeds {MAX_EMAIL_DOMAIN_LENGTH} characters"
277 raise ValueError(msg)
280def validate_email(email: str) -> str:
281 """Validate email address format.
283 Uses a reasonable regex pattern that covers most valid emails
284 without being overly strict.
286 Args:
287 email: The email address to validate.
289 Returns:
290 The validated email address.
292 Raises:
293 ValueError: If email format is invalid.
295 """
296 _validate_type(email, str, "Email")
297 _check_email_format(email)
298 _check_email_parts(email)
299 return email
302def _check_url_basics(url: str) -> None:
303 """Check basic URL constraints like empty, length and invalid characters."""
304 _validate_type(url, str, "URL")
306 if not url:
307 msg = "URL cannot be empty"
308 raise ValueError(msg)
310 if len(url) > MAX_URL_LENGTH:
311 msg = f"URL length exceeds maximum allowed length of {MAX_URL_LENGTH}"
312 raise ValueError(msg)
314 if "\x00" in url or not url.isprintable():
315 msg = "URL contains invalid characters"
316 raise ValueError(msg)
319def _check_scheme(
320 parsed: urllib.parse.SplitResult,
321 allowed_schemes: tuple[str, ...],
322) -> None:
323 """Validate the URL scheme."""
324 if not parsed.scheme:
325 msg = "URL must have a scheme (e.g., https://)"
326 raise ValueError(msg)
328 if parsed.scheme not in allowed_schemes:
329 msg = f"URL scheme '{parsed.scheme}' is not allowed. Allowed: {allowed_schemes}"
330 raise ValueError(msg)
333def _check_tld(domain: str) -> None:
334 """Validate that the domain has a TLD."""
335 has_no_tld = "." not in domain or domain.endswith(".")
336 is_localhost = domain.lower() in LOCALHOST_DOMAINS
337 if has_no_tld and not is_localhost:
338 msg = f"URL domain must have a TLD: {domain}"
339 raise ValueError(msg)
342def _check_url_domain(
343 parsed: urllib.parse.SplitResult,
344 allowed_schemes: tuple[str, ...],
345 require_tld: bool,
346) -> None:
347 """Validate URL scheme and domain."""
348 _check_scheme(parsed, allowed_schemes)
350 if not parsed.hostname:
351 msg = "URL must have a domain"
352 raise ValueError(msg)
354 if require_tld:
355 _check_tld(parsed.hostname)
358def validate_url(
359 url: str,
360 *,
361 allowed_schemes: tuple[str, ...] = ("http", "https"),
362 require_tld: bool = True,
363) -> str:
364 """Validate URL format and scheme.
366 Args:
367 url: The URL to validate.
368 allowed_schemes: Tuple of allowed URL schemes.
369 require_tld: Whether to require a TLD in the domain.
371 Returns:
372 The validated URL.
374 Raises:
375 ValueError: If URL format is invalid.
377 """
378 _check_url_basics(url)
380 try:
381 parsed = urlsplit(url)
382 _ = parsed.port
383 except ValueError as e:
384 msg = f"Invalid URL format: {e}"
385 raise ValueError(msg) from e
387 _check_url_domain(parsed, allowed_schemes, require_tld)
389 return url