Coverage for src / taipanstack / security / validators.py: 100%

108 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-23 14:54 +0000

1""" 

2Input validators for type-safe validation. 

3 

4Provides validation functions for common input types like email, 

5project names, URLs, etc. All validators raise ValueError on invalid input. 

6""" 

7 

8import re 

9from urllib.parse import urlparse 

10 

11# Constants to avoid magic values (PLR2004) 

12PYTHON_MAJOR_VERSION = 3 

13MIN_PYTHON_MINOR_VERSION = 10 

14MAX_EMAIL_LOCAL_LENGTH = 64 

15MAX_EMAIL_DOMAIN_LENGTH = 255 

16LOCALHOST_DOMAINS = ("localhost", "127.0.0.1", "::1") 

17PROJECT_NAME_RESERVED = frozenset( 

18 { 

19 "test", 

20 "tests", 

21 "src", 

22 "lib", 

23 "bin", 

24 "build", 

25 "dist", 

26 "setup", 

27 "config", 

28 "settings", 

29 "core", 

30 "main", 

31 "app", 

32 "site-packages", 

33 } 

34) 

35 

36 

37def _validate_type( 

38 value: object, expected_type: type | tuple[type, ...], name: str 

39) -> None: 

40 """Validate input type. 

41 

42 Args: 

43 value: The value to check. 

44 expected_type: The expected type(s). 

45 name: Name of the variable for the error message. 

46 

47 Raises: 

48 TypeError: If value is not of the expected type. 

49 

50 """ 

51 if not isinstance(value, expected_type): 

52 type_name = ( 

53 expected_type.__name__ 

54 if isinstance(expected_type, type) 

55 else " | ".join(t.__name__ for t in expected_type) 

56 ) 

57 msg = f"{name} must be {type_name}, got {type(value).__name__}" 

58 raise TypeError(msg) 

59 

60 

61def _check_project_name_length(name: str, max_length: int) -> None: 

62 """Check project name length. 

63 

64 Args: 

65 name: The project name. 

66 max_length: Maximum allowed length. 

67 

68 Raises: 

69 ValueError: If length is invalid. 

70 

71 """ 

72 if not name: 

73 msg = "Project name cannot be empty" 

74 raise ValueError(msg) 

75 

76 if len(name) > max_length: 

77 msg = f"Project name exceeds maximum length of {max_length}" 

78 raise ValueError(msg) 

79 

80 

81def _check_project_name_chars( 

82 name: str, allow_hyphen: bool, allow_underscore: bool 

83) -> None: 

84 """Check project name characters. 

85 

86 Args: 

87 name: The project name. 

88 allow_hyphen: Whether to allow hyphens. 

89 allow_underscore: Whether to allow underscores. 

90 

91 Raises: 

92 ValueError: If name contains invalid characters. 

93 

94 """ 

95 # Build allowed characters 

96 allowed = r"a-zA-Z0-9" 

97 if allow_hyphen: 

98 allowed += r"-" 

99 if allow_underscore: 

100 allowed += r"_" 

101 

102 pattern = rf"^[a-zA-Z][{allowed}]*\Z" 

103 

104 if not re.match(pattern, name): 

105 if not name[0].isalpha(): 

106 msg = "Project name must start with a letter" 

107 raise ValueError(msg) 

108 hyphen_msg = ", hyphens" if allow_hyphen else "" 

109 underscore_msg = ", underscores" if allow_underscore else "" 

110 msg = ( 

111 f"Project name contains invalid characters. " 

112 f"Allowed: letters, numbers{hyphen_msg}{underscore_msg}" 

113 ) 

114 raise ValueError(msg) 

115 

116 

117def _check_project_name_reserved(name: str) -> None: 

118 """Check if project name is reserved. 

119 

120 Args: 

121 name: The project name. 

122 

123 Raises: 

124 ValueError: If name is reserved. 

125 

126 """ 

127 if name.lower() in PROJECT_NAME_RESERVED: 

128 msg = f"Project name '{name}' is reserved" 

129 raise ValueError(msg) 

130 

131 

132def validate_project_name( 

133 name: str, 

134 *, 

135 max_length: int = 100, 

136 allow_hyphen: bool = True, 

137 allow_underscore: bool = True, 

138) -> str: 

139 """Validate a project name. 

140 

141 Args: 

142 name: The project name to validate. 

143 max_length: Maximum allowed length. 

144 allow_hyphen: Allow hyphens in name. 

145 allow_underscore: Allow underscores in name. 

146 

147 Returns: 

148 The validated project name. 

149 

150 Raises: 

151 ValueError: If the name is invalid. 

152 

153 Example: 

154 >>> validate_project_name("my_project") 

155 'my_project' 

156 >>> validate_project_name("123project") 

157 ValueError: Project name must start with a letter 

158 

159 """ 

160 _validate_type(name, str, "Project name") 

161 _check_project_name_length(name, max_length) 

162 _check_project_name_chars(name, allow_hyphen, allow_underscore) 

163 _check_project_name_reserved(name) 

164 

165 return name 

166 

167 

168def validate_python_version(version: str) -> str: 

169 """Validate Python version string. 

170 

171 Args: 

172 version: Version string like "3.12" or "3.10". 

173 

174 Returns: 

175 The validated version string. 

176 

177 Raises: 

178 ValueError: If version format is invalid or unsupported. 

179 

180 """ 

181 _validate_type(version, str, "Version") 

182 

183 pattern = r"^\d+\.\d+\Z" 

184 

185 if not re.match(pattern, version): 

186 msg = f"Invalid version format: '{version}'. Use 'X.Y' format (e.g., '3.12')" 

187 raise ValueError(msg) 

188 

189 try: 

190 major, minor = map(int, version.split(".")) 

191 except ValueError as e: 

192 msg = f"Invalid version numbers in '{version}'" 

193 raise ValueError(msg) from e 

194 

195 if major != PYTHON_MAJOR_VERSION: 

196 msg = f"Only Python 3.x is supported, got {major}.x" 

197 raise ValueError(msg) 

198 

199 if minor < MIN_PYTHON_MINOR_VERSION: 

200 msg = ( 

201 f"Python 3.{minor} is not supported. " 

202 f"Minimum is 3.{MIN_PYTHON_MINOR_VERSION}" 

203 ) 

204 raise ValueError(msg) 

205 

206 return version 

207 

208 

209def validate_email(email: str) -> str: 

210 """Validate email address format. 

211 

212 Uses a reasonable regex pattern that covers most valid emails 

213 without being overly strict. 

214 

215 Args: 

216 email: The email address to validate. 

217 

218 Returns: 

219 The validated email address. 

220 

221 Raises: 

222 ValueError: If email format is invalid. 

223 

224 """ 

225 _validate_type(email, str, "Email") 

226 

227 if not email: 

228 msg = "Email cannot be empty" 

229 raise ValueError(msg) 

230 

231 # RFC 5322 compliant pattern (simplified) 

232 pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\Z" 

233 

234 if not re.match(pattern, email): 

235 msg = f"Invalid email format: {email}" 

236 raise ValueError(msg) 

237 

238 # Additional checks 

239 local, domain = email.rsplit("@", 1) 

240 

241 if len(local) > MAX_EMAIL_LOCAL_LENGTH: 

242 msg = f"Email local part exceeds {MAX_EMAIL_LOCAL_LENGTH} characters" 

243 raise ValueError(msg) 

244 

245 if len(domain) > MAX_EMAIL_DOMAIN_LENGTH: 

246 msg = f"Email domain exceeds {MAX_EMAIL_DOMAIN_LENGTH} characters" 

247 raise ValueError(msg) 

248 

249 return email 

250 

251 

252def validate_url( 

253 url: str, 

254 *, 

255 allowed_schemes: tuple[str, ...] = ("http", "https"), 

256 require_tld: bool = True, 

257) -> str: 

258 """Validate URL format and scheme. 

259 

260 Args: 

261 url: The URL to validate. 

262 allowed_schemes: Tuple of allowed URL schemes. 

263 require_tld: Whether to require a TLD in the domain. 

264 

265 Returns: 

266 The validated URL. 

267 

268 Raises: 

269 ValueError: If URL format is invalid. 

270 

271 """ 

272 _validate_type(url, str, "URL") 

273 

274 if not url: 

275 msg = "URL cannot be empty" 

276 raise ValueError(msg) 

277 

278 try: 

279 parsed = urlparse(url) 

280 _ = parsed.port 

281 except ValueError as e: 

282 msg = f"Invalid URL format: {e}" 

283 raise ValueError(msg) from e 

284 

285 if not parsed.scheme: 

286 msg = "URL must have a scheme (e.g., https://)" 

287 raise ValueError(msg) 

288 

289 if parsed.scheme not in allowed_schemes: 

290 msg = f"URL scheme '{parsed.scheme}' is not allowed. Allowed: {allowed_schemes}" 

291 raise ValueError(msg) 

292 

293 if not parsed.netloc: 

294 msg = "URL must have a domain" 

295 raise ValueError(msg) 

296 

297 if require_tld: 

298 # Check for TLD (at least one dot) 

299 domain = parsed.netloc.split(":")[0] # Remove port if present 

300 has_no_tld = "." not in domain or domain.endswith(".") 

301 is_localhost = domain.lower() in LOCALHOST_DOMAINS 

302 if has_no_tld and not is_localhost: 

303 msg = f"URL domain must have a TLD: {domain}" 

304 raise ValueError(msg) 

305 

306 return url