Coverage for src / taipanstack / security / validators.py: 100%

153 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-12 21:18 +0000

1""" 

2Input validators for type-safe validation. 

3 

4Provides validation functions for common input types like email, 

5project names, URLs, etc. All validators raise ValueError on invalid input. 

6""" 

7 

8import re 

9import urllib.parse 

10from urllib.parse import urlsplit 

11 

12# Constants to avoid magic values (PLR2004) 

13PYTHON_MAJOR_VERSION = 3 

14MIN_PYTHON_MINOR_VERSION = 10 

15MAX_PYTHON_VERSION_LENGTH = 20 

16MAX_EMAIL_LOCAL_LENGTH = 64 

17MAX_EMAIL_DOMAIN_LENGTH = 255 

18MAX_URL_LENGTH = 2048 

19LOCALHOST_DOMAINS = ("localhost", "127.0.0.1", "::1") 

20PROJECT_NAME_RESERVED = frozenset( 

21 { 

22 "test", 

23 "tests", 

24 "src", 

25 "lib", 

26 "bin", 

27 "build", 

28 "dist", 

29 "setup", 

30 "config", 

31 "settings", 

32 "core", 

33 "main", 

34 "app", 

35 "site-packages", 

36 } 

37) 

38 

39 

40def _validate_type( 

41 value: object, expected_type: type | tuple[type, ...], name: str 

42) -> None: 

43 """Validate input type. 

44 

45 Args: 

46 value: The value to check. 

47 expected_type: The expected type(s). 

48 name: Name of the variable for the error message. 

49 

50 Raises: 

51 TypeError: If value is not of the expected type. 

52 

53 """ 

54 if not isinstance(value, expected_type): 

55 type_name = ( 

56 expected_type.__name__ 

57 if isinstance(expected_type, type) 

58 else " | ".join(t.__name__ for t in expected_type) 

59 ) 

60 msg = f"{name} must be {type_name}, got {type(value).__name__}" 

61 raise TypeError(msg) 

62 

63 

64def _check_project_name_length(name: str, max_length: int) -> None: 

65 """Check project name length. 

66 

67 Args: 

68 name: The project name. 

69 max_length: Maximum allowed length. 

70 

71 Raises: 

72 ValueError: If length is invalid. 

73 

74 """ 

75 if not name: 

76 msg = "Project name cannot be empty" 

77 raise ValueError(msg) 

78 

79 if len(name) > max_length: 

80 msg = f"Project name exceeds maximum length of {max_length}" 

81 raise ValueError(msg) 

82 

83 

84def _build_project_name_pattern(allow_hyphen: bool, allow_underscore: bool) -> str: 

85 """Build the regex pattern for allowed characters.""" 

86 allowed = r"a-zA-Z0-9" 

87 if allow_hyphen: 

88 allowed += r"-" 

89 if allow_underscore: 

90 allowed += r"_" 

91 return rf"^[a-zA-Z][{allowed}]*\Z" 

92 

93 

94def _build_invalid_chars_msg(allow_hyphen: bool, allow_underscore: bool) -> str: 

95 """Build the error message for invalid characters.""" 

96 hyphen_msg = ", hyphens" if allow_hyphen else "" 

97 underscore_msg = ", underscores" if allow_underscore else "" 

98 return ( 

99 f"Project name contains invalid characters. " 

100 f"Allowed: letters, numbers{hyphen_msg}{underscore_msg}" 

101 ) 

102 

103 

104def _check_project_name_chars( 

105 name: str, allow_hyphen: bool, allow_underscore: bool 

106) -> None: 

107 """Check project name characters. 

108 

109 Args: 

110 name: The project name. 

111 allow_hyphen: Whether to allow hyphens. 

112 allow_underscore: Whether to allow underscores. 

113 

114 Raises: 

115 ValueError: If name contains invalid characters. 

116 

117 """ 

118 pattern = _build_project_name_pattern(allow_hyphen, allow_underscore) 

119 

120 if not re.match(pattern, name): 

121 if not name[0].isalpha(): 

122 msg = "Project name must start with a letter" 

123 raise ValueError(msg) 

124 msg = _build_invalid_chars_msg(allow_hyphen, allow_underscore) 

125 raise ValueError(msg) 

126 

127 

128def _check_project_name_reserved(name: str) -> None: 

129 """Check if project name is reserved. 

130 

131 Args: 

132 name: The project name. 

133 

134 Raises: 

135 ValueError: If name is reserved. 

136 

137 """ 

138 if name.lower() in PROJECT_NAME_RESERVED: 

139 msg = f"Project name '{name}' is reserved" 

140 raise ValueError(msg) 

141 

142 

143def validate_project_name( 

144 name: str, 

145 *, 

146 max_length: int = 100, 

147 allow_hyphen: bool = True, 

148 allow_underscore: bool = True, 

149) -> str: 

150 """Validate a project name. 

151 

152 Args: 

153 name: The project name to validate. 

154 max_length: Maximum allowed length. 

155 allow_hyphen: Allow hyphens in name. 

156 allow_underscore: Allow underscores in name. 

157 

158 Returns: 

159 The validated project name. 

160 

161 Raises: 

162 ValueError: If the name is invalid. 

163 

164 Example: 

165 >>> validate_project_name("my_project") 

166 'my_project' 

167 >>> validate_project_name("123project") 

168 ValueError: Project name must start with a letter 

169 

170 """ 

171 _validate_type(name, str, "Project name") 

172 _check_project_name_length(name, max_length) 

173 _check_project_name_chars(name, allow_hyphen, allow_underscore) 

174 _check_project_name_reserved(name) 

175 

176 return name 

177 

178 

179def _check_version_format(version: str) -> None: 

180 """Check the basic formatting and safety of a version string.""" 

181 # Prevent DoS from massive integer string conversion limit in Python 

182 if len(version) > MAX_PYTHON_VERSION_LENGTH: 

183 msg = "Version string exceeds maximum length" 

184 raise ValueError(msg) 

185 

186 if "\x00" in version or not version.isprintable(): 

187 msg = "Version contains invalid characters" 

188 raise ValueError(msg) 

189 

190 if not version.isascii(): 

191 msg = f"Invalid version format: '{version}'. Use 'X.Y' format (e.g., '3.12')" 

192 raise ValueError(msg) 

193 

194 pattern = r"^\d+\.\d+\Z" 

195 

196 if not re.match(pattern, version): 

197 msg = f"Invalid version format: '{version}'. Use 'X.Y' format (e.g., '3.12')" 

198 raise ValueError(msg) 

199 

200 

201def _check_version_numbers(version: str) -> None: 

202 """Check the major and minor version numbers.""" 

203 try: 

204 major, minor = map(int, version.split(".")) 

205 except ValueError as e: 

206 msg = f"Invalid version numbers in '{version}'" 

207 raise ValueError(msg) from e 

208 

209 if major != PYTHON_MAJOR_VERSION: 

210 msg = f"Only Python 3.x is supported, got {major}.x" 

211 raise ValueError(msg) 

212 

213 if minor < MIN_PYTHON_MINOR_VERSION: 

214 msg = ( 

215 f"Python 3.{minor} is not supported. " 

216 f"Minimum is 3.{MIN_PYTHON_MINOR_VERSION}" 

217 ) 

218 raise ValueError(msg) 

219 

220 

221def validate_python_version(version: str) -> str: 

222 """Validate Python version string. 

223 

224 Args: 

225 version: Version string like "3.12" or "3.10". 

226 

227 Returns: 

228 The validated version string. 

229 

230 Raises: 

231 ValueError: If version format is invalid or unsupported. 

232 

233 """ 

234 _validate_type(version, str, "Version") 

235 _check_version_format(version) 

236 _check_version_numbers(version) 

237 return version 

238 

239 

240def _check_email_basics(email: str) -> None: 

241 """Check basic email constraints like empty, length and invalid characters.""" 

242 if not email: 

243 msg = "Email cannot be empty" 

244 raise ValueError(msg) 

245 

246 if len(email) > MAX_EMAIL_LOCAL_LENGTH + 1 + MAX_EMAIL_DOMAIN_LENGTH: 

247 msg = "Email length exceeds maximum allowed" 

248 raise ValueError(msg) 

249 

250 if "\x00" in email or not email.isprintable(): 

251 msg = "Email contains invalid characters" 

252 raise ValueError(msg) 

253 

254 

255def _check_email_format(email: str) -> None: 

256 """Check email format and basic constraints.""" 

257 _check_email_basics(email) 

258 

259 # RFC 5322 compliant pattern (simplified) 

260 pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\Z" 

261 

262 if not re.match(pattern, email): 

263 msg = f"Invalid email format: {email}" 

264 raise ValueError(msg) 

265 

266 

267def _check_email_parts(email: str) -> None: 

268 """Check local and domain parts of the email.""" 

269 local, domain = email.rsplit("@", 1) 

270 

271 if len(local) > MAX_EMAIL_LOCAL_LENGTH: 

272 msg = f"Email local part exceeds {MAX_EMAIL_LOCAL_LENGTH} characters" 

273 raise ValueError(msg) 

274 

275 if len(domain) > MAX_EMAIL_DOMAIN_LENGTH: 

276 msg = f"Email domain exceeds {MAX_EMAIL_DOMAIN_LENGTH} characters" 

277 raise ValueError(msg) 

278 

279 

280def validate_email(email: str) -> str: 

281 """Validate email address format. 

282 

283 Uses a reasonable regex pattern that covers most valid emails 

284 without being overly strict. 

285 

286 Args: 

287 email: The email address to validate. 

288 

289 Returns: 

290 The validated email address. 

291 

292 Raises: 

293 ValueError: If email format is invalid. 

294 

295 """ 

296 _validate_type(email, str, "Email") 

297 _check_email_format(email) 

298 _check_email_parts(email) 

299 return email 

300 

301 

302def _check_url_basics(url: str) -> None: 

303 """Check basic URL constraints like empty, length and invalid characters.""" 

304 _validate_type(url, str, "URL") 

305 

306 if not url: 

307 msg = "URL cannot be empty" 

308 raise ValueError(msg) 

309 

310 if len(url) > MAX_URL_LENGTH: 

311 msg = f"URL length exceeds maximum allowed length of {MAX_URL_LENGTH}" 

312 raise ValueError(msg) 

313 

314 if "\x00" in url or not url.isprintable(): 

315 msg = "URL contains invalid characters" 

316 raise ValueError(msg) 

317 

318 

319def _check_scheme( 

320 parsed: urllib.parse.SplitResult, 

321 allowed_schemes: tuple[str, ...], 

322) -> None: 

323 """Validate the URL scheme.""" 

324 if not parsed.scheme: 

325 msg = "URL must have a scheme (e.g., https://)" 

326 raise ValueError(msg) 

327 

328 if parsed.scheme not in allowed_schemes: 

329 msg = f"URL scheme '{parsed.scheme}' is not allowed. Allowed: {allowed_schemes}" 

330 raise ValueError(msg) 

331 

332 

333def _check_tld(domain: str) -> None: 

334 """Validate that the domain has a TLD.""" 

335 has_no_tld = "." not in domain or domain.endswith(".") 

336 is_localhost = domain.lower() in LOCALHOST_DOMAINS 

337 if has_no_tld and not is_localhost: 

338 msg = f"URL domain must have a TLD: {domain}" 

339 raise ValueError(msg) 

340 

341 

342def _check_url_domain( 

343 parsed: urllib.parse.SplitResult, 

344 allowed_schemes: tuple[str, ...], 

345 require_tld: bool, 

346) -> None: 

347 """Validate URL scheme and domain.""" 

348 _check_scheme(parsed, allowed_schemes) 

349 

350 if not parsed.hostname: 

351 msg = "URL must have a domain" 

352 raise ValueError(msg) 

353 

354 if require_tld: 

355 _check_tld(parsed.hostname) 

356 

357 

358def validate_url( 

359 url: str, 

360 *, 

361 allowed_schemes: tuple[str, ...] = ("http", "https"), 

362 require_tld: bool = True, 

363) -> str: 

364 """Validate URL format and scheme. 

365 

366 Args: 

367 url: The URL to validate. 

368 allowed_schemes: Tuple of allowed URL schemes. 

369 require_tld: Whether to require a TLD in the domain. 

370 

371 Returns: 

372 The validated URL. 

373 

374 Raises: 

375 ValueError: If URL format is invalid. 

376 

377 """ 

378 _check_url_basics(url) 

379 

380 try: 

381 parsed = urlsplit(url) 

382 _ = parsed.port 

383 except ValueError as e: 

384 msg = f"Invalid URL format: {e}" 

385 raise ValueError(msg) from e 

386 

387 _check_url_domain(parsed, allowed_schemes, require_tld) 

388 

389 return url