1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177"""Reasoned[T] generic wrapper for glass-box LLM reasoning.
This module provides a generic wrapper that forces chain-of-thought reasoning
BEFORE the model makes a determination. By placing the 'reasoning' field before
'value' in the Pydantic model, the JSON schema ensures the LLM produces
its reasoning before committing to an answer.
This enables:
1. Glass-box visibility into model reasoning decisions
2. Corpus collection for analyzing reasoning patterns
3. Better calibrated confidence through explicit evidence
Example:
>>> class ImageAnalysis(BaseModel, ReasonedOutputMixin):
... image_type: Reasoned[Literal["decorative", "informative"]]
... confidence: Reasoned[float]
...
>>> # LLM must produce reasoning before each value
>>> output = ImageAnalysis(
... image_type=Reasoned(
... reasoning="Large chart with axis labels and data points visible. Contains quantitative information.",
... value="informative"
... ),
... confidence=Reasoned(
... reasoning="Clear image, unambiguous chart structure. High confidence.",
... value=0.95
... )
... )
>>> corpus = output.extract_reasoning_corpus()
"""
from __future__ import annotations
import logging
from typing import Any, Generic, TypeVar
from pydantic import BaseModel, Field, field_validator
logger = logging.getLogger(__name__)
T = TypeVar("T")
class Reasoned(BaseModel, Generic[T]):
"""A value with explicit reasoning that preceded its determination.
Field ordering ensures 'reasoning' appears before 'value' in the
JSON schema, forcing LLMs to produce chain-of-thought first.
The reasoning should be 1-2 sentences (~10-200 chars) that:
- State the key evidence observed
- Connect evidence to the conclusion
Attributes:
reasoning: 1-2 sentence explanation of how the value was determined
value: The actual determined value
"""
reasoning: str = Field(
...,
min_length=10,
max_length=500, # Allow some flexibility, but encourage brevity
description=(
"1-2 sentences explaining how you determined this value. "
"State key evidence observed, then connect to your conclusion. "
"Keep it concise - sacrifice grammar for conciseness."
),
)
value: T = Field(
...,
description="The determined value based on the reasoning above.",
)
@field_validator("reasoning")
@classmethod
def validate_reasoning_quality(cls, v: str) -> str:
"""Validate reasoning quality and log verbose reasoning.
Args:
v: The reasoning string
Returns:
The validated reasoning string
"""
# Count approximate sentences
sentence_count = v.count(". ") + v.count("! ") + v.count("? ") + 1
if sentence_count > 3:
logger.debug(f"Verbose reasoning detected ({sentence_count} sentences): {v[:100]}...")
return v
def __repr__(self) -> str:
"""Return string representation."""
return f"Reasoned(reasoning={self.reasoning!r}, value={self.value!r})"
class ReasonedOutputMixin:
"""Mixin providing reasoning extraction utilities for Pydantic models.
Add this mixin to any Pydantic model that contains Reasoned[T] fields
to enable extraction of reasoning corpus for analysis and logging.
Example:
>>> class AnalysisOutput(BaseModel, ReasonedOutputMixin):
... layout_type: Reasoned[Literal["single", "multi"]]
... complexity: Reasoned[float]
...
>>> output = AnalysisOutput(...)
>>> corpus = output.extract_reasoning_corpus()
>>> # Returns list of dicts with field, reasoning, value, model_class
"""
def extract_reasoning_corpus(self) -> list[dict[str, Any]]:
"""Extract all Reasoned fields for corpus storage.
Walks through all fields in the model and extracts reasoning
from any Reasoned[T] instances, including nested lists.
Returns:
List of dictionaries containing:
- field: The field name (or field[index] for list items)
- reasoning: The reasoning string
- value: The determined value
- model_class: The name of the containing model class
"""
corpus: list[dict[str, Any]] = []
# Access model_fields from the Pydantic model class (not instance)
if not hasattr(self.__class__, "model_fields"):
return corpus
for field_name in self.__class__.model_fields.keys():
value = getattr(self, field_name)
if isinstance(value, Reasoned):
corpus.append(
{
"field": field_name,
"reasoning": value.reasoning,
"value": value.value,
"model_class": self.__class__.__name__,
}
)
elif isinstance(value, list):
for i, item in enumerate(value):
if isinstance(item, Reasoned):
corpus.append(
{
"field": f"{field_name}[{i}]",
"reasoning": item.reasoning,
"value": item.value,
"model_class": self.__class__.__name__,
}
)
# Also check if list items are models with ReasonedOutputMixin
elif hasattr(item, "extract_reasoning_corpus"):
nested_corpus = item.extract_reasoning_corpus()
for entry in nested_corpus:
entry["field"] = f"{field_name}[{i}].{entry['field']}"
corpus.extend(nested_corpus)
# Handle nested models with ReasonedOutputMixin
elif hasattr(value, "extract_reasoning_corpus"):
nested_corpus = value.extract_reasoning_corpus()
for entry in nested_corpus:
entry["field"] = f"{field_name}.{entry['field']}"
corpus.extend(nested_corpus)
return corpus
__all__ = [
"Reasoned",
"ReasonedOutputMixin",
]