#!/usr/bin/env python3 """ Test that Jinja2 expressions within YAML files are valid. This catches issues like inline comments in Jinja2 expressions within YAML task files. """ import re from pathlib import Path import pytest import yaml from jinja2 import Environment, StrictUndefined, TemplateSyntaxError def find_yaml_files_with_jinja2(): """Find all YAML files that might contain Jinja2 expressions.""" yaml_files = [] # Look for YAML files in roles that are likely to have Jinja2 patterns = ["roles/**/tasks/*.yml", "roles/**/defaults/*.yml", "roles/**/vars/*.yml", "playbooks/*.yml", "*.yml"] skip_dirs = {".git", ".venv", "venv", ".env", "configs"} for pattern in patterns: for path in Path(".").glob(pattern): if not any(skip_dir in path.parts for skip_dir in skip_dirs): yaml_files.append(path) return sorted(yaml_files) def extract_jinja2_expressions(content): """Extract all Jinja2 expressions from text content.""" expressions = [] # Find {{ ... }} expressions (variable interpolations) for match in re.finditer(r"\{\{(.+?)\}\}", content, re.DOTALL): expressions.append( { "type": "variable", "content": match.group(1), "full": match.group(0), "start": match.start(), "end": match.end(), } ) # Find {% ... %} expressions (control structures) for match in re.finditer(r"\{%(.+?)%\}", content, re.DOTALL): expressions.append( { "type": "control", "content": match.group(1), "full": match.group(0), "start": match.start(), "end": match.end(), } ) return expressions def find_line_number(content, position): """Find the line number for a given position in content.""" return content[:position].count("\n") + 1 def validate_jinja2_expression(expression, context_vars=None): """ Validate a single Jinja2 expression. Returns (is_valid, error_message) """ if context_vars is None: context_vars = get_test_variables() # First check for inline comments - this is the main issue we want to catch if "#" in expression["content"]: # Check if the # is within a list or dict literal content = expression["content"] # Remove strings to avoid false positives cleaned = re.sub(r'"[^"]*"', '""', content) cleaned = re.sub(r"'[^']*'", "''", cleaned) # Look for # that appears to be a comment # The # should have something before it (not at start) and something after (the comment text) # Also check for # at the start of a line within the expression if "#" in cleaned: # Check each line in the cleaned expression for line in cleaned.split("\n"): line = line.strip() if "#" in line: # If # appears and it's not escaped (\#) hash_idx = line.find("#") if hash_idx >= 0: # Check if it's escaped if hash_idx == 0 or line[hash_idx - 1] != "\\": # This looks like an inline comment return ( False, "Inline comment (#) found in Jinja2 expression - comments must be outside expressions", ) try: env = Environment(undefined=StrictUndefined) # Add common Ansible filters (expanded list) env.filters["bool"] = lambda x: bool(x) env.filters["default"] = lambda x, d="": x if x else d env.filters["to_uuid"] = lambda x: "mock-uuid" env.filters["b64encode"] = lambda x: "mock-base64" env.filters["b64decode"] = lambda x: "mock-decoded" env.filters["version"] = lambda x, op: True env.filters["ternary"] = lambda x, y, z=None: y if x else (z if z is not None else "") env.filters["regex_replace"] = lambda x, p, r: x env.filters["difference"] = lambda x, y: list(set(x) - set(y)) env.filters["strftime"] = lambda fmt, ts: "mock-timestamp" env.filters["int"] = lambda x: int(x) if x else 0 env.filters["list"] = lambda x: list(x) env.filters["map"] = lambda x, *args: x env.tests["version"] = lambda x, op: True # Wrap the expression in appropriate delimiters for parsing if expression["type"] == "variable": template_str = "{{" + expression["content"] + "}}" else: template_str = "{%" + expression["content"] + "%}" # Try to compile the template template = env.from_string(template_str) # Try to render it with test variables # This will catch undefined variables and runtime errors template.render(**context_vars) return True, None except TemplateSyntaxError as e: # Check for the specific inline comment issue if "#" in expression["content"]: # Check if the # is within a list or dict literal content = expression["content"] # Remove strings to avoid false positives cleaned = re.sub(r'"[^"]*"', '""', content) cleaned = re.sub(r"'[^']*'", "''", cleaned) # Look for # that appears to be a comment (not in string, not escaped) if re.search(r"[^\\\n]#[^\}]", cleaned): return False, "Inline comment (#) found in Jinja2 expression - comments must be outside expressions" return False, f"Syntax error: {e.message}" except Exception as e: # Be lenient - we mainly care about inline comments and basic syntax # Ignore runtime errors (undefined vars, missing attributes, etc.) error_str = str(e).lower() if any(ignore in error_str for ignore in ["undefined", "has no attribute", "no filter"]): return True, None # These are runtime issues, not syntax issues return False, f"Error: {str(e)}" def get_test_variables(): """Get a comprehensive set of test variables for expression validation.""" return { # Network configuration "IP_subject_alt_name": "10.0.0.1", "server_name": "algo-vpn", "wireguard_port": 51820, "wireguard_network": "10.19.49.0/24", "wireguard_network_ipv6": "fd9d:bc11:4021::/64", "strongswan_network": "10.19.48.0/24", "strongswan_network_ipv6": "fd9d:bc11:4020::/64", # Feature flags "ipv6_support": True, "dns_encryption": True, "dns_adblocking": True, "wireguard_enabled": True, "ipsec_enabled": True, # OpenSSL/PKI "openssl_constraint_random_id": "test-uuid-12345", "CA_password": "test-password", "p12_export_password": "test-p12-password", "ipsec_pki_path": "/etc/ipsec.d", "ipsec_config_path": "/etc/ipsec.d", "subjectAltName": "IP:10.0.0.1,DNS:vpn.example.com", "subjectAltName_type": "IP", # Ansible variables "ansible_default_ipv4": {"address": "10.0.0.1"}, "ansible_default_ipv6": {"address": "2600:3c01::f03c:91ff:fedf:3b2a"}, "ansible_distribution": "Ubuntu", "ansible_distribution_version": "22.04", "ansible_date_time": {"epoch": "1234567890"}, # User management "users": ["alice", "bob", "charlie"], "all_users": ["alice", "bob", "charlie", "david"], # Common variables "item": "test-item", "algo_provider": "local", "algo_server_name": "algo-vpn", "dns_servers": ["1.1.1.1", "1.0.0.1"], # OpenSSL version for conditionals "openssl_version": "3.0.0", # IPsec configuration "certificate_validity_days": 3650, "ike_cipher": "aes128gcm16-prfsha512-ecp256", "esp_cipher": "aes128gcm16-ecp256", } def validate_yaml_file(yaml_path, check_inline_comments_only=False): """ Validate all Jinja2 expressions in a YAML file. Returns (has_inline_comments, list_of_inline_comment_errors, list_of_other_errors) """ inline_comment_errors = [] other_errors = [] try: with open(yaml_path) as f: content = f.read() # First, check if it's valid YAML try: yaml.safe_load(content) except yaml.YAMLError: # YAML syntax error, not our concern here return False, [], [] # Extract all Jinja2 expressions expressions = extract_jinja2_expressions(content) if not expressions: return False, [], [] # No Jinja2 expressions to validate # Validate each expression for expr in expressions: is_valid, error = validate_jinja2_expression(expr) if not is_valid: line_num = find_line_number(content, expr["start"]) error_msg = f"{yaml_path}:{line_num}: {error}" # Separate inline comment errors from other errors if error and "inline comment" in error.lower(): inline_comment_errors.append(error_msg) # Show context for inline comment errors if len(expr["full"]) < 200: inline_comment_errors.append(f" Expression: {expr['full'][:100]}...") elif not check_inline_comments_only: other_errors.append(error_msg) except Exception as e: if not check_inline_comments_only: other_errors.append(f"{yaml_path}: Error reading file: {e}") return len(inline_comment_errors) > 0, inline_comment_errors, other_errors def test_regression_openssl_inline_comments(): """ Regression test for the specific OpenSSL inline comment bug that was reported. Tests that we correctly detect inline comments in the exact pattern that caused the issue. """ # The problematic expression that was reported problematic_expr = """{{ [ subjectAltName_type + ':' + IP_subject_alt_name + ('/255.255.255.255' if subjectAltName_type == 'IP' else ''), 'DNS:' + openssl_constraint_random_id, # Per-deployment UUID prevents cross-deployment reuse 'email:' + openssl_constraint_random_id # Unique email domain isolates certificate scope ] + ( ['IP:' + ansible_default_ipv6['address'] + '/128'] if ipv6_support else [] ) }}""" # The fixed expression (without inline comments) fixed_expr = """{{ [ subjectAltName_type + ':' + IP_subject_alt_name + ('/255.255.255.255' if subjectAltName_type == 'IP' else ''), 'DNS:' + openssl_constraint_random_id, 'email:' + openssl_constraint_random_id ] + ( ['IP:' + ansible_default_ipv6['address'] + '/128'] if ipv6_support else [] ) }}""" # Test the problematic expression - should fail expr_with_comments = { "type": "variable", "content": problematic_expr[2:-2], # Remove {{ }} "full": problematic_expr, } is_valid, error = validate_jinja2_expression(expr_with_comments) assert not is_valid, "Should have detected inline comments in problematic expression" assert "inline comment" in error.lower(), f"Expected inline comment error, got: {error}" # Test the fixed expression - should pass expr_fixed = { "type": "variable", "content": fixed_expr[2:-2], # Remove {{ }} "full": fixed_expr, } is_valid, error = validate_jinja2_expression(expr_fixed) assert is_valid, f"Fixed expression should pass but got error: {error}" def test_edge_cases_inline_comments(): """ Test various edge cases for inline comment detection. Ensures we correctly handle hashes in strings, escaped hashes, and various comment patterns. """ test_cases = [ # (expression, should_pass, description) ("{{ 'string with # hash' }}", True, "Hash in string should pass"), ('{{ "another # in string" }}', True, "Hash in double-quoted string should pass"), ("{{ var # comment }}", False, "Simple inline comment should fail"), ("{{ var1 + var2 # This is an inline comment }}", False, "Inline comment with text should fail"), (r"{{ '\#' + 'escaped hash' }}", True, "Escaped hash should pass"), ("{% if true # comment %}", False, "Comment in control block should fail"), ("{% for item in list # loop comment %}", False, "Comment in for loop should fail"), ("{{ {'key': 'value # not a comment'} }}", True, "Hash in dict string value should pass"), ("{{ url + '/#anchor' }}", True, "URL fragment should pass"), ("{{ '#FF0000' }}", True, "Hex color code should pass"), ("{{ var }} # comment outside", True, "Comment outside expression should pass"), ( """{{ [ 'item1', # comment here 'item2' ] }}""", False, "Multi-line with inline comment should fail", ), ] for expr_str, should_pass, description in test_cases: # For the "comment outside" case, extract just the Jinja2 expression if "{{" in expr_str and "#" in expr_str and expr_str.index("#") > expr_str.index("}}"): # Comment is outside the expression - extract just the expression part match = re.search(r"(\{\{.+?\}\})", expr_str) if match: actual_expr = match.group(1) expr_type = "variable" content = actual_expr[2:-2].strip() else: continue elif expr_str.strip().startswith("{{"): expr_type = "variable" content = expr_str.strip()[2:-2] actual_expr = expr_str.strip() elif expr_str.strip().startswith("{%"): expr_type = "control" content = expr_str.strip()[2:-2] actual_expr = expr_str.strip() else: continue expr = {"type": expr_type, "content": content, "full": actual_expr} is_valid, error = validate_jinja2_expression(expr) if should_pass: assert is_valid, f"{description}: {error}" else: assert not is_valid, f"{description}: Should have failed but passed" assert "inline comment" in (error or "").lower(), ( f"{description}: Expected inline comment error, got: {error}" ) def test_yaml_files_no_inline_comments(): """ Test that all YAML files in the project don't contain inline comments in Jinja2 expressions. """ yaml_files = find_yaml_files_with_jinja2() all_inline_comment_errors = [] files_with_inline_comments = [] for yaml_file in yaml_files: has_inline_comments, inline_errors, _ = validate_yaml_file(yaml_file, check_inline_comments_only=True) if has_inline_comments: files_with_inline_comments.append(str(yaml_file)) all_inline_comment_errors.extend(inline_errors) # Assert no inline comments found assert not all_inline_comment_errors, ( f"Found inline comments in {len(files_with_inline_comments)} files:\n" + "\n".join(all_inline_comment_errors[:10]) # Show first 10 errors ) def test_openssl_file_specifically(): """ Specifically test the OpenSSL file that had the original bug. """ openssl_file = Path("roles/strongswan/tasks/openssl.yml") if not openssl_file.exists(): pytest.skip(f"{openssl_file} not found") has_inline_comments, inline_errors, _ = validate_yaml_file(openssl_file) assert not has_inline_comments, f"Found inline comments in {openssl_file}:\n" + "\n".join(inline_errors)