Spaces:

helal94hb1
/

backend_chatbot

Sleeping

App Files Files Community

helal94hb1 commited on Sep 13

Commit

c53ff60

1 Parent(s): bea2de8

fix: new embeddings and reranker2

Browse files

Files changed (1) hide show

app/services/retrieval.py +4 -7

app/services/retrieval.py CHANGED Viewed

@@ -57,10 +57,8 @@ def load_retrieval_artifacts():
         # Load into state
         state.transformed_chunk_embeddings = artifacts['transformed_chunk_embeddings']
         state.chunk_ids_in_order = artifacts['chunk_ids']
-        state.wq_weights = torch.from_numpy(artifacts['wq_weights']).to(device)
         state.temperature = artifacts['temperature'][0] # Extract scalar from array
         logger.info(f"Successfully loaded {len(state.chunk_ids_in_order)} transformed embeddings.")
-        logger.info(f"Loaded Wq matrix of shape: {state.wq_weights.shape}")
         logger.info(f"Loaded temperature value: {state.temperature:.4f}")
     except Exception as e:
@@ -86,7 +84,7 @@ def load_retrieval_artifacts():
 # In app/services/retrieval.py
-def find_top_gnn_chunks(query_text: str, top_n: int = 100) -> List[Tuple[str, float]]:
     """
     Performs a similarity search that is mathematically identical to the trained model,
     but without loading the GNN itself. It uses pre-transformed embeddings.
@@ -104,9 +102,8 @@ def find_top_gnn_chunks(query_text: str, top_n: int = 100) -> List[Tuple[str, fl
                 query_text, convert_to_tensor=True, device=state.device
             )
-            # 2. Apply the learned 'Wq' transformation to the query embedding
-            q_trans = F.linear(query_embedding.unsqueeze(0), state.wq_weights)
-            q_trans_normalized = F.normalize(q_trans, p=2, dim=-1)
             # 3. Convert to numpy for fast similarity calculation
             query_vec_np = q_trans_normalized.cpu().numpy()
@@ -115,7 +112,7 @@ def find_top_gnn_chunks(query_text: str, top_n: int = 100) -> List[Tuple[str, fl
             similarities = (query_vec_np @ state.transformed_chunk_embeddings.T)[0]
             # 5. Apply the learned temperature scaling
-            scaled_similarities = similarities * state.temperature
         # 6. Combine with IDs, sort, and return top N
         results = list(zip(state.chunk_ids_in_order, scaled_similarities))

         # Load into state
         state.transformed_chunk_embeddings = artifacts['transformed_chunk_embeddings']
         state.chunk_ids_in_order = artifacts['chunk_ids']
         state.temperature = artifacts['temperature'][0] # Extract scalar from array
         logger.info(f"Successfully loaded {len(state.chunk_ids_in_order)} transformed embeddings.")
         logger.info(f"Loaded temperature value: {state.temperature:.4f}")
     except Exception as e:
 # In app/services/retrieval.py
+def find_top_gnn_chunks(query_text: str, top_n: int = 200) -> List[Tuple[str, float]]:
     """
     Performs a similarity search that is mathematically identical to the trained model,
     but without loading the GNN itself. It uses pre-transformed embeddings.
                 query_text, convert_to_tensor=True, device=state.device
             )
+            # 2. Apply query normalization to the query embedding
+            q_trans_normalized = F.normalize(query_embedding.unsqueeze(0), p=2, dim=-1)
             # 3. Convert to numpy for fast similarity calculation
             query_vec_np = q_trans_normalized.cpu().numpy()
             similarities = (query_vec_np @ state.transformed_chunk_embeddings.T)[0]
             # 5. Apply the learned temperature scaling
+            scaled_similarities = similarities * np.exp(state.temperature)
         # 6. Combine with IDs, sort, and return top N
         results = list(zip(state.chunk_ids_in_order, scaled_similarities))