Validation Schemas¶
Schemas for input validation, error handling, and data quality assurance.
ValidatedTicker¶
Validated ticker symbol with metadata and quality checks.
Schema Definition¶
Python
class ValidatedTicker(BaseModel):
symbol: str = Field(..., description="Validated ticker symbol")
original_input: str = Field(..., description="Original user input")
asset_class: Optional[str] = Field(None, description="Detected asset class")
# Validation results
is_valid: bool = Field(..., description="Whether ticker is valid")
validation_source: str = Field(..., description="Source used for validation")
# Market data
exchange: Optional[str] = Field(None, description="Primary exchange")
currency: Optional[str] = Field(None, description="Trading currency")
market_cap: Optional[float] = Field(None, ge=0, description="Market capitalization")
# Quality metrics
data_availability: float = Field(..., ge=0.0, le=1.0, description="Data availability score")
liquidity_score: float = Field(..., ge=0.0, le=1.0, description="Liquidity assessment")
# Metadata
company_name: Optional[str] = Field(None, description="Company/asset name")
sector: Optional[str] = Field(None, description="Sector classification")
industry: Optional[str] = Field(None, description="Industry classification")
# Validation timestamp
validated_at: datetime = Field(default_factory=datetime.now)
model_config = {
"extra": "forbid",
"str_strip_whitespace": True,
"str_upper": True # Automatically uppercase ticker symbols
}
@field_validator('symbol')
@classmethod
def validate_ticker_format(cls, v: str) -> str:
# Remove common separators and validate
clean_symbol = v.replace('-', '').replace('.', '')
if not clean_symbol.isalnum():
raise ValueError('Ticker must contain only alphanumeric characters, hyphens, and dots')
if len(v) > 10:
raise ValueError('Ticker symbol too long')
return v.upper()
Example¶
JSON
{
"symbol": "AAPL",
"original_input": "aapl",
"asset_class": "stock",
"is_valid": true,
"validation_source": "Yahoo Finance",
"exchange": "NASDAQ",
"currency": "USD",
"market_cap": 3000000000000,
"data_availability": 0.98,
"liquidity_score": 1.0,
"company_name": "Apple Inc.",
"sector": "Technology",
"industry": "Consumer Electronics",
"validated_at": "2025-01-15T10:30:00Z"
}
ValidationResult¶
Comprehensive validation result with error details and suggestions.
Schema Definition¶
Python
class ValidationError(BaseModel):
field_path: str = Field(..., description="Path to the field with error")
message: str = Field(..., description="Human-readable error message")
error_type: str = Field(..., description="Error type classification")
input_value: Any = Field(..., description="The invalid input value")
suggested_fix: Optional[str] = Field(None, description="Suggested correction")
model_config = {"extra": "forbid"}
class ValidationResult(BaseModel):
is_valid: bool = Field(..., description="Overall validation status")
validation_timestamp: datetime = Field(default_factory=datetime.now)
# Validation details
schema_name: str = Field(..., description="Schema being validated against")
schema_version: str = Field(default="1.0", description="Schema version")
# Results
errors: List[ValidationError] = Field(default_factory=list)
warnings: List[ValidationError] = Field(default_factory=list)
# Processed data
sanitized_data: Optional[Dict[str, Any]] = Field(None, description="Cleaned/sanitized data")
original_data: Dict[str, Any] = Field(..., description="Original input data")
# Quality metrics
data_completeness: float = Field(..., ge=0.0, le=1.0, description="Completeness score")
data_quality_score: float = Field(..., ge=0.0, le=1.0, description="Overall quality score")
# Validation context
validation_mode: Literal["strict", "lenient", "warn_only"] = Field(default="strict")
validation_rules_applied: List[str] = Field(default_factory=list)
model_config = {"extra": "forbid"}
@property
def error_count(self) -> int:
return len(self.errors)
@property
def warning_count(self) -> int:
return len(self.warnings)
@property
def has_errors(self) -> bool:
return len(self.errors) > 0
@property
def has_warnings(self) -> bool:
return len(self.warnings) > 0
Example¶
JSON
{
"is_valid": false,
"validation_timestamp": "2025-01-15T10:30:00Z",
"schema_name": "TenKInsight",
"schema_version": "1.0",
"errors": [
{
"field_path": "ticker",
"message": "Ticker symbol format is invalid",
"error_type": "FORMAT_ERROR",
"input_value": "apple123",
"suggested_fix": "Use standard ticker format like 'AAPL'"
}
],
"warnings": [
{
"field_path": "confidence_level",
"message": "Confidence level is unusually low",
"error_type": "QUALITY_WARNING",
"input_value": 0.3,
"suggested_fix": "Consider additional analysis to improve confidence"
}
],
"sanitized_data": {
"ticker": "AAPL",
"confidence_level": 0.85
},
"original_data": {
"ticker": "apple123",
"confidence_level": 0.3
},
"data_completeness": 0.85,
"data_quality_score": 0.75,
"validation_mode": "strict",
"validation_rules_applied": [
"ticker_format_validation",
"confidence_range_validation",
"required_fields_validation"
]
}
ReporterInput¶
Input validation for report generation with template and data validation.
Schema Definition¶
Python
class ReporterInput(BaseModel):
# Report configuration
report_type: Literal["stock_analysis", "etf_analysis", "crypto_analysis", "portfolio_review"]
template_name: str = Field(..., description="Report template to use")
output_format: Literal["html", "pdf", "json"] = Field(default="html")
# Data inputs
analysis_data: Dict[str, Any] = Field(..., description="Analysis results to include")
portfolio_data: Optional[Dict[str, Any]] = Field(None, description="Portfolio context")
# Report customization
include_charts: bool = Field(default=True)
include_risk_details: bool = Field(default=True)
include_data_sources: bool = Field(default=True)
language: str = Field(default="en", description="Report language")
# Quality requirements
min_confidence_threshold: float = Field(default=0.5, ge=0.0, le=1.0)
require_recent_data: bool = Field(default=True)
max_data_age_hours: int = Field(default=24, ge=1)
# Validation settings
strict_validation: bool = Field(default=True)
allow_partial_data: bool = Field(default=False)
model_config = {
"extra": "forbid",
"str_strip_whitespace": True
}
@field_validator('analysis_data')
@classmethod
def validate_analysis_data_structure(cls, v: Dict[str, Any]) -> Dict[str, Any]:
required_fields = ['ticker', 'recommendation', 'confidence_level']
missing_fields = [field for field in required_fields if field not in v]
if missing_fields:
raise ValueError(f'Missing required analysis fields: {missing_fields}')
return v
Example¶
JSON
{
"report_type": "stock_analysis",
"template_name": "comprehensive_stock_report",
"output_format": "html",
"analysis_data": {
"ticker": "AAPL",
"recommendation": "BUY",
"confidence_level": 0.85,
"price_target": 180.00
},
"portfolio_data": {
"current_allocation": 0.15,
"target_allocation": 0.20
},
"include_charts": true,
"include_risk_details": true,
"include_data_sources": true,
"language": "en",
"min_confidence_threshold": 0.7,
"require_recent_data": true,
"max_data_age_hours": 12,
"strict_validation": true,
"allow_partial_data": false
}
DataQualityAssessment¶
Assessment of data quality for analysis inputs.
Schema Definition¶
Python
class DataQualityIssue(BaseModel):
issue_type: Literal["MISSING_DATA", "STALE_DATA", "INCONSISTENT_DATA", "LOW_CONFIDENCE", "SOURCE_UNAVAILABLE"]
severity: Literal["LOW", "MEDIUM", "HIGH", "CRITICAL"]
field_path: str = Field(..., description="Path to affected field")
description: str = Field(..., description="Issue description")
impact: str = Field(..., description="Impact on analysis")
suggested_action: Optional[str] = Field(None, description="Recommended action")
class DataQualityAssessment(BaseModel):
assessment_timestamp: datetime = Field(default_factory=datetime.now)
data_source: str = Field(..., description="Source of data being assessed")
# Overall quality metrics
overall_quality_score: float = Field(..., ge=0.0, le=1.0)
completeness_score: float = Field(..., ge=0.0, le=1.0)
freshness_score: float = Field(..., ge=0.0, le=1.0)
accuracy_score: float = Field(..., ge=0.0, le=1.0)
consistency_score: float = Field(..., ge=0.0, le=1.0)
# Data characteristics
total_fields: int = Field(..., ge=0)
populated_fields: int = Field(..., ge=0)
missing_fields: int = Field(..., ge=0)
# Freshness analysis
newest_data_age: Optional[timedelta] = None
oldest_data_age: Optional[timedelta] = None
avg_data_age: Optional[timedelta] = None
# Quality issues
issues: List[DataQualityIssue] = Field(default_factory=list)
critical_issues: int = Field(default=0, ge=0)
# Recommendations
usable_for_analysis: bool = Field(..., description="Whether data is suitable for analysis")
confidence_impact: float = Field(..., ge=0.0, le=1.0, description="Impact on analysis confidence")
recommended_actions: List[str] = Field(default_factory=list)
model_config = {"extra": "forbid"}
@property
def completion_rate(self) -> float:
if self.total_fields == 0:
return 0.0
return self.populated_fields / self.total_fields
@property
def has_critical_issues(self) -> bool:
return self.critical_issues > 0
Validation Utilities¶
Custom Validators¶
Python
class TickerValidator:
"""Utility class for ticker validation"""
@staticmethod
def validate_format(ticker: str) -> bool:
"""Validate ticker format"""
if not ticker:
return False
# Allow alphanumeric, hyphens, and dots
clean_ticker = ticker.replace('-', '').replace('.', '')
return clean_ticker.isalnum() and len(ticker) <= 10
@staticmethod
def normalize(ticker: str) -> str:
"""Normalize ticker format"""
return ticker.strip().upper()
@staticmethod
def detect_asset_class(ticker: str) -> Optional[str]:
"""Detect asset class from ticker format"""
if ticker.endswith('-USD') or ticker.endswith('USDT'):
return "crypto"
elif any(etf_suffix in ticker for etf_suffix in ['ETF', 'SPDR', 'VTI', 'QQQ']):
return "etf"
else:
return "stock"
class ConfidenceValidator:
"""Utility class for confidence validation"""
@staticmethod
def validate_range(confidence: float) -> bool:
"""Validate confidence is in valid range"""
return 0.0 <= confidence <= 1.0
@staticmethod
def assess_quality(confidence: float) -> str:
"""Assess confidence quality"""
if confidence >= 0.8:
return "HIGH"
elif confidence >= 0.6:
return "MEDIUM"
elif confidence >= 0.4:
return "LOW"
else:
return "VERY_LOW"
Usage Examples¶
Ticker Validation¶
Python
from finwiz.schemas.validation import ValidatedTicker
# Validate ticker
try:
validated = ValidatedTicker(
symbol="AAPL",
original_input="aapl",
is_valid=True,
validation_source="Yahoo Finance",
data_availability=0.98,
liquidity_score=1.0
)
print(f"Validated ticker: {validated.symbol}")
except ValidationError as e:
print(f"Validation failed: {e}")
Validation Result Processing¶
Python
from finwiz.schemas.validation import ValidationResult, ValidationError
# Create validation result
result = ValidationResult(
is_valid=False,
schema_name="TenKInsight",
errors=[
ValidationError(
field_path="ticker",
message="Invalid ticker format",
error_type="FORMAT_ERROR",
input_value="invalid123",
suggested_fix="Use valid ticker symbol"
)
],
original_data={"ticker": "invalid123"},
data_completeness=0.5,
data_quality_score=0.3
)
# Process results
if result.has_errors:
print(f"Validation failed with {result.error_count} errors:")
for error in result.errors:
print(f" - {error.field_path}: {error.message}")
if error.suggested_fix:
print(f" Suggestion: {error.suggested_fix}")
Data Quality Assessment¶
Python
from finwiz.schemas.validation import DataQualityAssessment, DataQualityIssue
# Assess data quality
assessment = DataQualityAssessment(
data_source="Yahoo Finance",
overall_quality_score=0.85,
completeness_score=0.90,
freshness_score=0.95,
accuracy_score=0.80,
consistency_score=0.85,
total_fields=20,
populated_fields=18,
missing_fields=2,
issues=[
DataQualityIssue(
issue_type="MISSING_DATA",
severity="MEDIUM",
field_path="dividend_yield",
description="Dividend yield data not available",
impact="Cannot assess dividend income potential",
suggested_action="Use alternative data source or estimate"
)
],
usable_for_analysis=True,
confidence_impact=0.1
)
print(f"Data quality score: {assessment.overall_quality_score}")
print(f"Usable for analysis: {assessment.usable_for_analysis}")
Integration with Validation Manager¶
Python
from finwiz.validation import get_validation_manager
# Get validation manager
manager = get_validation_manager()
# Validate crew output
result = manager.validate_crew_output(
data=analysis_data,
asset_class="stock",
analysis_type="ten_k_insight"
)
if result.is_valid:
# Use sanitized data
clean_data = result.sanitized_data
analysis = TenKInsight.model_validate(clean_data)
else:
# Handle validation errors
for error in result.errors:
logger.error(f"Validation error: {error.message}")
Related Documentation¶
- Analysis Schemas - Schemas being validated
- Portfolio Schemas - Portfolio validation
- Discovery Schemas - Discovery validation
- Schema Relationships - How validation fits in the system