diff --git a/detect_secrets/core/scan.py b/detect_secrets/core/scan.py index f84d53c3c..2f9627662 100644 --- a/detect_secrets/core/scan.py +++ b/detect_secrets/core/scan.py @@ -1,5 +1,6 @@ import os import subprocess +from functools import lru_cache from typing import Any from typing import cast from typing import Generator @@ -24,6 +25,18 @@ from .log import log from .plugins import Plugin from .potential_secret import PotentialSecret +from detect_secrets.util.filetype import determine_file_type +from detect_secrets.util.filetype import FileType + + +@lru_cache(maxsize=1) +def read_raw_lines(file_name: str) -> List[str]: + try: + with open(file_name) as f: + return f.readlines() + except IOError: + log.debug(f"Can't open file {file_name}") + return [] def get_files_to_scan( @@ -307,7 +320,7 @@ def _process_line_based_plugins( filename: str, ) -> Generator[PotentialSecret, None, None]: line_content = [line[1] for line in lines] - + raw_code_snippet_lines = read_raw_lines(filename) # NOTE: We iterate through lines *then* plugins, because we want to quit early if any of the # filters return True. for line_number, line in lines: @@ -327,25 +340,33 @@ def _process_line_based_plugins( ): continue - yield from ( - secret - for plugin in get_plugins() + for plugin in get_plugins(): for secret in _scan_line( - plugin=plugin, - filename=filename, - line=line, - line_number=line_number, - context=code_snippet, - ) - if not _is_filtered_out( - required_filter_parameters=['context'], - filename=secret.filename, - secret=secret.secret_value, - plugin=plugin, - line=line, - context=code_snippet, - ) - ) + plugin=plugin, + filename=filename, + line=line, + line_number=line_number, + context=code_snippet, + ): + if not _is_filtered_out( + required_filter_parameters=['context'], + filename=secret.filename, + secret=secret.secret_value, + plugin=plugin, + line=line, + context=code_snippet, + ): + if determine_file_type(filename) == FileType.YAML and secret.secret_value: + # YAML specifically has multi-line string parsing that groups the + # different lines as 1. + # Calculate actual line number in case of YAML multi-line string + actual_line_number = line_number + for i, l in enumerate(raw_code_snippet_lines[actual_line_number - 1:]): + if secret.secret_value in l: + actual_line_number += i + break + secret.line_number = actual_line_number + yield secret def _scan_line( diff --git a/test_data/scan_test_multiline.yaml b/test_data/scan_test_multiline.yaml new file mode 100644 index 000000000..ad57eb1b5 --- /dev/null +++ b/test_data/scan_test_multiline.yaml @@ -0,0 +1,13 @@ +configuration: + datadogAgent: + enabled: true + name: name + +image: + repository: gcr.io/some/scheduler + pullPolicy: Always + +schedule: "* * * * *" + +cmdList: "curl --retry-connrefused --retry 3 --retry-delay 5 -X POST http://someone:someone@my.cluster.local/v1/?event=EXPIRE_SYNC&days=1|1440 \ + ||curl --retry-connrefused --retry 3 --retry-delay 5 -X POST http://anotherone:anotherone@my.cluster.local/v1/?hours=1|60" diff --git a/tests/core/scan_test.py b/tests/core/scan_test.py index 566e2e463..cb069690a 100644 --- a/tests/core/scan_test.py +++ b/tests/core/scan_test.py @@ -96,6 +96,19 @@ def test_handles_binary_files_gracefully(): assert not list(scan.scan_file(f.name)) + @staticmethod + def test_multi_line_results_accuracy(): + file_name = 'test_data/scan_test_multiline.yaml' + results = list(scan.scan_file(file_name)) + assert len(results) > 0, f'Expected to find secrets in {file_name}' + lines_with_findings = set() + for secret in results: + if secret.line_number not in lines_with_findings: + lines_with_findings.add(secret.line_number) + else: + assert secret.line_number not in lines_with_findings,\ + 'Found multiple secrets on the same line number' + @pytest.fixture(autouse=True) def configure_plugins():