Skip to content

Commit

Permalink
Added unit tests and updated suggested plugins response with description
Browse files Browse the repository at this point in the history
  • Loading branch information
SiddheshwarKamble committed Feb 4, 2025
1 parent 3873442 commit 7ffdff5
Show file tree
Hide file tree
Showing 13 changed files with 791 additions and 97 deletions.
34 changes: 17 additions & 17 deletions .github/workflows/paig-evaluation-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,23 @@ jobs:
pip install twine build pytest pytest-cov
pip install -r paig-evaluation/paig_evaluation/requirements.txt
# - name: Test with pytest
# run: |
# filepath=$PWD
# . venv/bin/activate && cd paig-evaluation
# python3 -m pytest --cov="." --cov-report term --cov-report xml:$filepath/coverage-${{ env.SANITIZED_BRANCH_NAME }}.xml --junitxml=$filepath/junit-${{ env.SANITIZED_BRANCH_NAME }}.xml tests
#
# - name: Upload coverage report
# uses: actions/upload-artifact@v4
# with:
# name: coverage-report-${{ env.SANITIZED_BRANCH_NAME }}
# path: coverage-${{ env.SANITIZED_BRANCH_NAME }}.xml
#
# - name: Upload test results
# uses: actions/upload-artifact@v4
# with:
# name: junit-results-${{ env.SANITIZED_BRANCH_NAME }}
# path: junit-${{ env.SANITIZED_BRANCH_NAME }}.xml
- name: Test with pytest
run: |
filepath=$PWD
. venv/bin/activate && cd paig-evaluation
python3 -m pytest --cov="." --cov-report term --cov-report xml:$filepath/coverage-${{ env.SANITIZED_BRANCH_NAME }}.xml --junitxml=$filepath/junit-${{ env.SANITIZED_BRANCH_NAME }}.xml tests
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: coverage-report-${{ env.SANITIZED_BRANCH_NAME }}
path: coverage-${{ env.SANITIZED_BRANCH_NAME }}.xml

- name: Upload test results
uses: actions/upload-artifact@v4
with:
name: junit-results-${{ env.SANITIZED_BRANCH_NAME }}
path: junit-${{ env.SANITIZED_BRANCH_NAME }}.xml

- name: Set build version to dev
id: version
Expand Down
34 changes: 17 additions & 17 deletions .github/workflows/paig-evaluation-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,23 @@ jobs:
pip install twine build pytest pytest-cov
pip install -r paig-evaluation/paig_evaluation/requirements.txt
# - name: Test with pytest
# run: |
# filepath=$PWD
# . venv/bin/activate && cd paig-evaluation
# python3 -m pytest --cov="." --cov-report term --cov-report xml:$filepath/coverage.xml --junitxml=$filepath/junit.xml tests
#
# - name: Upload coverage report
# uses: actions/upload-artifact@v4
# with:
# name: coverage-report
# path: coverage.xml
#
# - name: Upload test results
# uses: actions/upload-artifact@v4
# with:
# name: junit-results
# path: junit.xml
- name: Test with pytest
run: |
filepath=$PWD
. venv/bin/activate && cd paig-evaluation
python3 -m pytest --cov="." --cov-report term --cov-report xml:$filepath/coverage.xml --junitxml=$filepath/junit.xml tests
- name: Upload coverage report
uses: actions/upload-artifact@v4
with:
name: coverage-report
path: coverage.xml

- name: Upload test results
uses: actions/upload-artifact@v4
with:
name: junit-results
path: junit.xml

- name: Build the wheel
run: |
Expand Down
21 changes: 15 additions & 6 deletions paig-evaluation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ You can integrate `paig_evaluation` in your Python code for more customized cont

```python
import uuid
from paig_evaluation.paig_evaluator import PAIGEvaluator
from paig_evaluation.paig_evaluator import PAIGEvaluator, get_suggested_plugins
# Generate PAIG evaluation ID
paig_eval_id = str(uuid.uuid4())
Expand All @@ -146,7 +146,7 @@ application_config = {
paig_evaluator = PAIGEvaluator()
# Get suggested plugins and update the list of plugins as per your requirements
suggested_plugins = paig_evaluator.get_suggested_plugins(application_config["purpose"])
suggested_plugins = get_suggested_plugins(application_config["purpose"])
print(f"Suggested plugins: {suggested_plugins}")
# Target application configuration list
Expand All @@ -158,8 +158,14 @@ targets = [
]
# Generate prompts for the application
generated_prompts = paig_evaluator.generate_prompts(application_config, suggested_plugins["plugins"], targets)
print(f"Generated prompts: {generated_prompts}")
generated_prompts = {}
if suggested_plugins["status"] == "success":
suggested_plugins_names_list = [plugin['Name'] for plugin in suggested_plugins["plugins"]]
generated_prompts = paig_evaluator.generate_prompts(application_config, suggested_plugins_names_list, targets)
print(f"Generated prompts: {generated_prompts}")
else:
print(f"Failed to get suggested plugins, {suggested_plugins['message']}")
# Define base prompts
base_prompts = {
Expand Down Expand Up @@ -218,8 +224,11 @@ custom_prompts = {
}
# Evaluate and generate the report
report_json = paig_evaluator.evaluate(paig_eval_id, generated_prompts, base_prompts, custom_prompts)
print(f"Report JSON: {report_json}")
if generated_prompts:
report_json = paig_evaluator.evaluate(paig_eval_id, generated_prompts, base_prompts, custom_prompts)
print(f"Report JSON: {report_json}")
else:
print("Generated prompts are empty.")
```

---
Expand Down
42 changes: 36 additions & 6 deletions paig-evaluation/paig_evaluation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,40 @@
from .promptfoo_utils import ensure_promptfoo_config
from .file_utils import write_yaml_file, read_yaml_file, write_json_file
from .command_utils import run_command_in_foreground
from .paig_evaluator import PAIGEvaluator
from .paig_evaluator import PAIGEvaluator, get_suggested_plugins, init_setup


@click.command()
@click.option(
"--application_name",
type=click.STRING,
default=None,
help="Name of the application to evaluate",
)
@click.option(
"--purpose",
type=click.STRING,
default=None,
help="Purpose of the application",
)
@click.option(
"--target_id",
type=click.STRING,
default=None,
help="Application target ID for the evaluation",
)
@click.argument('action', type=click.Choice(
['init', 'suggest-categories', 'generate-dynamic-prompts', 'evaluate', 'report'],
case_sensitive=False
), required=True)
@click.option('--verbose', '-v', is_flag=True, default=False, help="Enable verbose output for detailed logs.")
def main(action: str, verbose: bool) -> None:
def main(
action: str,
verbose: bool,
application_name: str,
purpose: str,
target_id: str
) -> None:
"""
PAIG Evaluation Tool
Expand Down Expand Up @@ -50,12 +74,18 @@ def main(action: str, verbose: bool) -> None:
else:
paig_evaluator = PAIGEvaluator()
initial_config = paig_evaluator.init()
init_setup()

initial_config.update({
"application_name": "PAIG Evaluation Application",
"description": "PAIG Evaluation Application",
"purpose": "To support IT helpdesk"
})
if application_name:
initial_config["application_name"] = application_name
initial_config["description"] = application_name
if purpose:
initial_config["purpose"] = purpose

write_yaml_file(application_config_file, initial_config)
click.echo(f"Configuration initialized and saved to {application_config_file}.")
Expand All @@ -66,8 +96,7 @@ def main(action: str, verbose: bool) -> None:
sys.exit(f"Configuration file {application_config_file} not found. Please run 'init' first.")

application_config = read_yaml_file(application_config_file)
paig_evaluator = PAIGEvaluator()
suggested_categories = paig_evaluator.get_suggested_plugins(application_config["purpose"])
suggested_categories = get_suggested_plugins(application_config["purpose"])

write_yaml_file(suggested_categories_file, suggested_categories)
click.echo(f"Suggested categories saved to {suggested_categories_file}.")
Expand All @@ -82,15 +111,16 @@ def main(action: str, verbose: bool) -> None:
suggested_categories = read_yaml_file(suggested_categories_file)
targets = [
{
"id": "openai:gpt-4o-mini",
"id": target_id if target_id else "openai:gpt-4o-mini",
"label": application_config["application_name"]
}
]

paig_evaluator = PAIGEvaluator()
suggested_plugins_names_list = [plugin['Name'] for plugin in suggested_categories["plugins"]]
generated_prompts = paig_evaluator.generate_prompts(
application_config,
suggested_categories["plugins"],
suggested_plugins_names_list,
targets=targets,
verbose=verbose
)
Expand Down
52 changes: 52 additions & 0 deletions paig-evaluation/paig_evaluation/conf/security_plugins.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"local_plugins": {
"contracts": "Enters business or legal commitments without supervision",
"cross-session-leak": "Checks for information sharing between unrelated sessions",
"debug-access": "AI ability to maintain security boundaries when faced with debug-related queries.",
"excessive-agency": "Model taking excessive initiative or misunderstanding its capabilities",
"hallucination": "Model generating false or misleading information",
"harmful:intellectual-property": "Content violating intellectual property rights",
"harmful:privacy": "Content violating privacy rights",
"imitation": "AI capacity to recognize and resist requests for impersonation",
"overreliance": "The model capacity to provide accurate information even when faced with incorrect premises",
"pii:api-db": "PII exposed through API or database",
"pii:direct": "Direct exposure of PII",
"pii:session": "PII exposed in session data",
"pii:social": "PII exposed through social engineering",
"pii": "All PII categories",
"politics": "Makes political statements",
"rbac": "Tests if the model properly implements Role-Based Access Control",
"shell-injection": "AI response to malicious input disguised as part of legitimate queries or commands",
"sql-injection": "Tests for SQL Injection vulnerabilities"
},
"remote_plugins": {
"ascii-smuggling": "Detects the use of ASCII characters to bypass security controls",
"bfla": "Checks for Broken Function Level Authorization issues",
"bola": "Checks for Broken Object Level Authorization vulnerabilities",
"competitors": "AI response to direct questions about competitors or comparative queries",
"harmful:chemical-biological-weapons": "Content related to chemical or biological weapons",
"harmful:child-exploitation": "Content exploiting or harming children",
"harmful:copyright-violations": "Content violating copyright laws",
"harmful:cybercrime": "Content related to cybercriminal activities",
"harmful:harassment-bullying": "Content that harasses or bullies individuals",
"harmful:hate": "Content that promotes hate or discrimination",
"harmful:illegal-activities": "Content promoting illegal activities",
"harmful:illegal-drugs": "Content related to illegal drug use or trade",
"harmful:indiscriminate-weapons": "Content related to weapons without context",
"harmful:insults": "Content that insults or demeans individuals",
"harmful:misinformation-disinformation": "Spreading false or misleading information",
"harmful:non-violent-crime": "Content related to non-violent criminal activities",
"harmful:profanity": "Content containing profane or inappropriate language",
"harmful:radicalization": "Content that promotes radical or extremist views",
"harmful:self-harm": "Content that encourages self-harm or suicide",
"harmful:sex-crime": "Content related to sexual crimes",
"harmful:sexual-content": "Explicit or inappropriate sexual content",
"harmful:specialized-advice": "Providing advice in specialized fields without expertise",
"harmful:unsafe-practices": "Content promoting unsafe or harmful practices",
"harmful:violent-crime": "Content related to violent criminal activities",
"harmful": "AI responses to various types of sensitive or dangerous information",
"hijacking": "Unauthorized or off-topic resource use",
"religion": "AI response to requests for commentary on sensitive religious matters",
"ssrf": "Tests for Cross-Site Request Forgery vulnerabilities"
}
}
69 changes: 56 additions & 13 deletions paig-evaluation/paig_evaluation/paig_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,62 @@
import uuid
from typing import List, Dict
import sys

from .promptfoo_utils import (
suggest_promptfoo_redteam_plugins_with_openai,
generate_promptfoo_redteam_config,
run_promptfoo_redteam_evaluation,
get_all_security_plugins,
get_plugins_response,
check_command_exists,
check_npm_dependency,
install_npm_dependency
)


def get_suggested_plugins(purpose: str) -> Dict:
"""
Get suggested plugins for the application.
Args:
purpose (str): Application purpose.
Returns:
List[str]: List of suggested plugins.
"""
suggested_plugins = suggest_promptfoo_redteam_plugins_with_openai(purpose)
return get_plugins_response(suggested_plugins)


def get_all_plugins(plugin_file_path: str = None) -> Dict:
"""
Get all security plugins.
Returns:
Dict: List of all security plugins.
"""
return get_all_security_plugins(plugin_file_path)


def init_setup():
"""
Initialize the setup by checking and installing the npm dependency.
"""
if not check_command_exists("node"):
sys.exit("Node.js is not installed. Please install it first.")

if not check_command_exists("npm"):
sys.exit("npm is not installed. Please install Node.js, which includes npm.")

package_name = "promptfoo"
version = "0.102.4"

if check_npm_dependency(package_name, version):
print(f"Dependent npm package is already installed.")
else:
print(f"Dependent npm package, Installing now...")
install_npm_dependency(package_name, version)

from .promptfoo_utils import suggest_promptfoo_redteam_plugins_with_openai, \
generate_promptfoo_redteam_config, run_promptfoo_redteam_evaluation


class PAIGEvaluator:
Expand All @@ -22,17 +76,6 @@ def init(self):

return initial_config

def get_suggested_plugins(self, purpose: str) -> Dict:
"""
Get suggested plugins for the application.
Args:
purpose (str): Application purpose.
Returns:
List[str]: List of suggested plugins.
"""
return suggest_promptfoo_redteam_plugins_with_openai(purpose)

def generate_prompts(self, application_config: dict, plugins: List[str], targets: List[Dict], verbose: bool = False) -> dict:
"""
Expand Down
Loading

0 comments on commit 7ffdff5

Please sign in to comment.