Technology & AI
Build a Learning Powered Emphasis Agent that Learns to Retrieve Relevant Long-Term Memories for Accurate LLM Question Answering

@dataclass
class MemoryItem:
memory_id: int
topic: str
entity: str
slot: str
value: str
text: str
def build_memory_bank() -> List[MemoryItem]:
entities = [
{
"entity": "Astra",
"topic": "robotics",
"facts": {
"battery": "18 hours",
"sensor": "LiDAR",
"country": "Japan",
"release_year": "2023",
"specialty": "warehouse navigation",
},
},
{
"entity": "Orion",
"topic": "astronomy",
"facts": {
"telescope": "infrared array",
"country": "Chile",
"discovery_year": "2019",
"target": "exoplanet atmospheres",
"aperture": "8 meters",
},
},
{
"entity": "Vita",
"topic": "biomedicine",
"facts": {
"compound": "VX-17",
"trial_phase": "Phase II",
"country": "Canada",
"target": "inflammatory markers",
"delivery": "oral capsule",
},
},
{
"entity": "Nimbus",
"topic": "climate",
"facts": {
"satellite": "polar orbiter",
"country": "Norway",
"launch_year": "2022",
"instrument": "microwave radiometer",
"mission": "sea ice monitoring",
},
},
{
"entity": "Atlas",
"topic": "logistics",
"facts": {
"fleet_size": "240 trucks",
"hub": "Muscat",
"software": "predictive routing",
"fuel_policy": "hybrid-first",
"region": "GCC",
},
},
{
"entity": "Lumos",
"topic": "materials",
"facts": {
"alloy": "Ti-6Al-4V",
"process": "laser sintering",
"density": "4.43 g/cm3",
"country": "Germany",
"use_case": "aerospace brackets",
},
},
{
"entity": "Cedar",
"topic": "agriculture",
"facts": {
"crop": "wheat",
"irrigation": "drip control",
"country": "India",
"yield_gain": "12 percent",
"soil_sensor": "capacitive probe",
},
},
{
"entity": "Pulse",
"topic": "healthcare",
"facts": {
"device": "ECG patch",
"battery": "7 days",
"country": "USA",
"connectivity": "Bluetooth Low Energy",
"use_case": "arrhythmia screening",
},
},
]
phrasing_templates = [
"{entity} in {topic} uses {value} for {slot}.",
"The {slot} associated with {entity} is {value}.",
"{entity} has {slot}: {value}.",
"For {entity}, the recorded {slot} is {value}.",
"Reference note: {entity} -> {slot} = {value}.",
]
distractor_templates = [
"{entity} was discussed in a briefing about cross-domain innovation.",
"{entity} has been compared with several other projects in recent reports.",
"A summary note mentions {entity} among notable initiatives.",
"{entity} appears in a high-level update without technical details.",
"Stakeholders reviewed {entity} in a strategic planning session.",
]
memory_bank = []
memory_id = 0
for item in entities:
entity = item["entity"]
topic = item["topic"]
for slot, value in item["facts"].items():
for t in phrasing_templates:
text = t.format(entity=entity, topic=topic, slot=slot, value=value)
memory_bank.append(MemoryItem(
memory_id=memory_id,
topic=topic,
entity=entity,
slot=slot,
value=value,
text=text
))
memory_id += 1
for t in distractor_templates:
text = t.format(entity=entity)
memory_bank.append(MemoryItem(
memory_id=memory_id,
topic=topic,
entity=entity,
slot="distractor",
value="n/a",
text=text
))
memory_id += 1
extra_noise = [
"General note: system maintenance occurred on Tuesday.",
"A committee discussed budget timelines and operational readiness.",
"The archive includes summaries of projects across multiple departments.",
"No relevant technical value is stated in this memory.",
"A status update mentioned partnerships and future opportunities.",
"An unrelated note references shipping delays and staffing changes.",
"Background memo: the team reviewed dashboards and reporting cadence.",
"This memory contains no answer-bearing facts.",
]
for text in extra_noise:
memory_bank.append(MemoryItem(
memory_id=memory_id,
topic="noise",
entity="none",
slot="distractor",
value="n/a",
text=text
))
memory_id += 1
return memory_bank
memory_bank = build_memory_bank()
memory_texts = [m.text for m in memory_bank]
memory_embeddings = embed_texts(memory_texts)
def build_queries(memory_bank: List[MemoryItem]) -> List[Dict[str, Any]]:
patterns = [
"What is the {slot} of {entity}?",
"Which {slot} does {entity} have?",
"Tell me the {slot} for {entity}.",
"Can you recall the {slot} associated with {entity}?",
"What was recorded as the {slot} of {entity}?",
]
queries = []
qid = 0
for m in memory_bank:
if m.slot == "distractor":
continue
q = random.choice(patterns).format(slot=m.slot.replace("_", " "), entity=m.entity)
queries.append({
"query_id": qid,
"query": q,
"entity": m.entity,
"slot": m.slot,
"gold_value": m.value,
"gold_memory_id": m.memory_id,
"gold_text": m.text,
"topic": m.topic,
})
qid += 1
random.shuffle(queries)
return queries
queries = build_queries(memory_bank)
query_texts = [q["query"] for q in queries]
query_embeddings = embed_texts(query_texts)


