Spaces:
Running
Running
Commit
·
8ad42f5
1
Parent(s):
8779583
Updated search engine for generating album
Browse files- AI_USAGE_REPORT.txt +359 -0
- cloudzy/agents/image_analyzer.py +1 -1
- cloudzy/ai_utils.py +41 -0
- cloudzy/database.py +4 -0
- cloudzy/routes/photo.py +83 -3
- cloudzy/routes/upload.py +65 -62
- cloudzy/schemas.py +18 -1
- cloudzy/search_engine.py +66 -0
- cloudzy/utils/file_upload_service.py +0 -1
AI_USAGE_REPORT.txt
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
================================================================================
|
| 2 |
+
AI USAGE REPORT
|
| 3 |
+
Cloudzy AI Challenge - Photo Album Management System
|
| 4 |
+
================================================================================
|
| 5 |
+
|
| 6 |
+
PROJECT OVERVIEW
|
| 7 |
+
================
|
| 8 |
+
This project implements an AI-enhanced photo management system that uses machine learning
|
| 9 |
+
models for generating embeddings and AI summaries for photo clusters. The system allows
|
| 10 |
+
users to upload photos, search by similarity, and organize them into meaningful albums
|
| 11 |
+
with AI-generated summaries.
|
| 12 |
+
|
| 13 |
+
================================================================================
|
| 14 |
+
1. WHERE AND HOW AI WAS USED
|
| 15 |
+
================================================================================
|
| 16 |
+
|
| 17 |
+
A. IMAGE EMBEDDING GENERATION
|
| 18 |
+
Location: cloudzy/ai_utils.py - ImageEmbeddingGenerator class
|
| 19 |
+
Purpose: Convert photo metadata (tags, description, caption) into 1024-dimensional
|
| 20 |
+
vector embeddings for similarity search
|
| 21 |
+
|
| 22 |
+
Model Used:
|
| 23 |
+
- Provider: Hugging Face Hub (InferenceClient)
|
| 24 |
+
- Model Name: intfloat/multilingual-e5-large
|
| 25 |
+
- Endpoint: feature_extraction
|
| 26 |
+
|
| 27 |
+
How It's Used:
|
| 28 |
+
1. User uploads photo with metadata (tags, caption, description)
|
| 29 |
+
2. metadata is combined into a single text string
|
| 30 |
+
3. Text is sent to HF model via InferenceClient.feature_extraction()
|
| 31 |
+
4. Model returns 1024-d embedding vector
|
| 32 |
+
5. Embedding is stored in FAISS index for similarity search
|
| 33 |
+
|
| 34 |
+
Integration Points:
|
| 35 |
+
- cloudzy/routes/upload.py: Called during photo upload
|
| 36 |
+
- cloudzy/search_engine.py: Used for vector similarity search
|
| 37 |
+
- Database: Embeddings stored as numpy arrays
|
| 38 |
+
|
| 39 |
+
B. AI SUMMARY GENERATION
|
| 40 |
+
Location: cloudzy/ai_utils.py - TextSummarizer class
|
| 41 |
+
Purpose: Generate meaningful summaries of photo clusters based on actual photo metadata
|
| 42 |
+
|
| 43 |
+
Model Used:
|
| 44 |
+
- Provider: Hugging Face Hub (InferenceClient)
|
| 45 |
+
- Model Name: facebook/bart-large-cnn
|
| 46 |
+
- Endpoint: summarization
|
| 47 |
+
|
| 48 |
+
How It's Used:
|
| 49 |
+
1. User requests /albums endpoint
|
| 50 |
+
2. System retrieves all photo clusters
|
| 51 |
+
3. For each cluster, collects all captions and tags from photos
|
| 52 |
+
4. Combined metadata is sent to BART summarization model
|
| 53 |
+
5. Model generates concise summary (e.g., "A collection of indoor photos featuring...")
|
| 54 |
+
6. Summary replaces placeholder "Cluster of similar photos" in response
|
| 55 |
+
|
| 56 |
+
Integration Points:
|
| 57 |
+
- cloudzy/routes/photo.py: get_albums() endpoint
|
| 58 |
+
- Response Schema: Pydantic AlbumItem model
|
| 59 |
+
- Fallback: If summarization fails, returns truncated text
|
| 60 |
+
|
| 61 |
+
================================================================================
|
| 62 |
+
2. PROMPTS AND MODEL INPUTS
|
| 63 |
+
================================================================================
|
| 64 |
+
|
| 65 |
+
A. IMAGE EMBEDDING INPUTS
|
| 66 |
+
Raw Input Format:
|
| 67 |
+
tags: List[str] = ["nature", "sunset", "beach"]
|
| 68 |
+
description: str = "A beautiful sunset at the beach with waves"
|
| 69 |
+
caption: str = "Sunset beach scene"
|
| 70 |
+
|
| 71 |
+
Processing:
|
| 72 |
+
Combined Text = " ".join(tags) + " " + description + " " + caption
|
| 73 |
+
Example: "nature sunset beach A beautiful sunset at the beach with waves Sunset beach scene"
|
| 74 |
+
|
| 75 |
+
Model Request (Hugging Face InferenceClient):
|
| 76 |
+
client.feature_extraction(
|
| 77 |
+
text=combined_text,
|
| 78 |
+
model="intfloat/multilingual-e5-large"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
Expected Output:
|
| 82 |
+
- Type: List of floats (1024 dimensions)
|
| 83 |
+
- Converted to: numpy.ndarray of shape (1024,)
|
| 84 |
+
- Data type: float32
|
| 85 |
+
- Usage: Stored in FAISS index for vector similarity search
|
| 86 |
+
|
| 87 |
+
B. SUMMARIZATION INPUTS
|
| 88 |
+
Raw Input Format:
|
| 89 |
+
For each album cluster, combine all photo metadata:
|
| 90 |
+
texts = []
|
| 91 |
+
for photo in cluster_photos:
|
| 92 |
+
texts.append(photo.caption)
|
| 93 |
+
texts.extend(photo.tags)
|
| 94 |
+
combined_input = " ".join(texts)
|
| 95 |
+
|
| 96 |
+
Example Input:
|
| 97 |
+
"Beach sunset waves ocean Sunset at the ocean view Nature landscape
|
| 98 |
+
Seascape beautiful A sunset scene with ocean waves A scenic beach view"
|
| 99 |
+
|
| 100 |
+
Model Request (Hugging Face InferenceClient):
|
| 101 |
+
client.summarization(
|
| 102 |
+
text=combined_input,
|
| 103 |
+
model="facebook/bart-large-cnn"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
Expected Output:
|
| 107 |
+
- Type: List containing dictionary with 'summary_text' key
|
| 108 |
+
- Example: "A collection of beach and sunset photographs featuring scenic ocean views"
|
| 109 |
+
- Processing: Extract summary_text from returned object
|
| 110 |
+
- Type Conversion: Ensure string type for Pydantic validation
|
| 111 |
+
|
| 112 |
+
================================================================================
|
| 113 |
+
3. HOW MODEL OUTPUTS WERE REFINED
|
| 114 |
+
================================================================================
|
| 115 |
+
|
| 116 |
+
A. EMBEDDING OUTPUT REFINEMENT
|
| 117 |
+
Issue Encountered:
|
| 118 |
+
- Expected shape: (512,) per documentation
|
| 119 |
+
- Actual shape: (1024,) from model
|
| 120 |
+
- Initial: Validation checked for 1024 but comment said 512
|
| 121 |
+
|
| 122 |
+
Resolution:
|
| 123 |
+
- Updated validation to expect 1024 dimensions (correct model behavior)
|
| 124 |
+
- Converged to: if embedding.shape[0] != 1024: raise ValueError
|
| 125 |
+
- Added type casting: np.array(result, dtype=np.float32).reshape(-1)
|
| 126 |
+
- Reshape(-1) ensures flattening to 1D array
|
| 127 |
+
|
| 128 |
+
Code Refinement (ai_utils.py, lines 50-62):
|
| 129 |
+
def _embed_text(self, text: str) -> np.ndarray:
|
| 130 |
+
result = self.client.feature_extraction(text, model=self.model_name)
|
| 131 |
+
embedding = np.array(result, dtype=np.float32).reshape(-1)
|
| 132 |
+
if embedding.shape[0] != 1024:
|
| 133 |
+
raise ValueError(f"Expected embedding of size 1024, got {embedding.shape[0]}")
|
| 134 |
+
return embedding
|
| 135 |
+
|
| 136 |
+
B. SUMMARIZATION OUTPUT REFINEMENT
|
| 137 |
+
Issue Encountered:
|
| 138 |
+
- Pydantic validation error: "Input should be a valid string"
|
| 139 |
+
- Received: SummarizationOutput object instead of string
|
| 140 |
+
- Root Cause: client.summarization() returns structured object, not string
|
| 141 |
+
|
| 142 |
+
Resolution:
|
| 143 |
+
- Added type-safe extraction logic
|
| 144 |
+
- Implemented multiple fallback formats:
|
| 145 |
+
1. If list: Extract first element's 'summary_text' field
|
| 146 |
+
2. If dict: Get 'summary_text' field directly
|
| 147 |
+
3. Fallback: Convert to string
|
| 148 |
+
|
| 149 |
+
Code Refinement (ai_utils.py, lines 90-100):
|
| 150 |
+
result = self.client.summarization(text, model=self.model_name)
|
| 151 |
+
|
| 152 |
+
# Extract the summary text from the result object
|
| 153 |
+
if isinstance(result, list) and len(result) > 0:
|
| 154 |
+
return result[0].get("summary_text", str(result[0]))
|
| 155 |
+
elif isinstance(result, dict):
|
| 156 |
+
return result.get("summary_text", str(result))
|
| 157 |
+
else:
|
| 158 |
+
return str(result)
|
| 159 |
+
|
| 160 |
+
C. ERROR HANDLING AND DEFAULTS
|
| 161 |
+
Embedding Generation:
|
| 162 |
+
- Validation ensures exact dimension match
|
| 163 |
+
- Raises clear error if dimension mismatch
|
| 164 |
+
- Prevents downstream vector search issues
|
| 165 |
+
|
| 166 |
+
Summarization:
|
| 167 |
+
- Try-except block with graceful fallback
|
| 168 |
+
- Fallback: Returns truncated input (first 80 chars)
|
| 169 |
+
- Empty text handling: Returns default "Album of photos"
|
| 170 |
+
- Ensures robustness when HF API is unavailable
|
| 171 |
+
|
| 172 |
+
================================================================================
|
| 173 |
+
4. MANUAL VS AI-GENERATED PARTS
|
| 174 |
+
================================================================================
|
| 175 |
+
|
| 176 |
+
MANUAL PARTS (100% Developer-Written)
|
| 177 |
+
====================================
|
| 178 |
+
✓ Database schema and models
|
| 179 |
+
- cloudzy/models.py: SQLAlchemy Photo model
|
| 180 |
+
- cloudzy/database.py: Database connection and session management
|
| 181 |
+
|
| 182 |
+
✓ API Route Handlers
|
| 183 |
+
- cloudzy/routes/photo.py: All endpoint logic
|
| 184 |
+
- cloudzy/routes/upload.py: File upload handling
|
| 185 |
+
- cloudzy/routes/search.py: Search endpoint implementation
|
| 186 |
+
|
| 187 |
+
✓ File Management
|
| 188 |
+
- cloudzy/utils/file_upload_service.py: Upload service
|
| 189 |
+
- cloudzy/utils/file_utils.py: File utilities
|
| 190 |
+
|
| 191 |
+
✓ Data Serialization
|
| 192 |
+
- cloudzy/schemas.py: Pydantic models and validation
|
| 193 |
+
|
| 194 |
+
✓ Search Engine Implementation
|
| 195 |
+
- cloudzy/search_engine.py: FAISS vector search logic
|
| 196 |
+
- Distance calculation and result ranking
|
| 197 |
+
|
| 198 |
+
✓ Application Configuration
|
| 199 |
+
- app.py: FastAPI app setup
|
| 200 |
+
- Dockerfile: Containerization
|
| 201 |
+
- requirements.txt: Dependencies
|
| 202 |
+
|
| 203 |
+
HYBRID PARTS (Manual Integration + AI Models)
|
| 204 |
+
==============================================
|
| 205 |
+
✓ ImageEmbeddingGenerator Class
|
| 206 |
+
- Manual: Class structure, API client initialization
|
| 207 |
+
- Manual: Error handling and validation logic
|
| 208 |
+
- Manual: Type conversion and reshaping
|
| 209 |
+
- AI: Feature extraction from HF model
|
| 210 |
+
- Result: Text → 1024-d vector embeddings
|
| 211 |
+
|
| 212 |
+
✓ TextSummarizer Class
|
| 213 |
+
- Manual: Class structure, API client initialization
|
| 214 |
+
- Manual: Output parsing and extraction logic
|
| 215 |
+
- Manual: Error handling and fallbacks
|
| 216 |
+
- Manual: Empty text handling
|
| 217 |
+
- AI: Summary generation from combined text
|
| 218 |
+
- Result: Multi-sentence text → concise summary
|
| 219 |
+
|
| 220 |
+
✓ Album Summary Integration (photo.py)
|
| 221 |
+
- Manual: Cluster iteration and photo data collection
|
| 222 |
+
- Manual: Text concatenation logic
|
| 223 |
+
- Manual: Response structure and schema mapping
|
| 224 |
+
- AI: Summary generation
|
| 225 |
+
- Result: Photo cluster → meaningful album summary
|
| 226 |
+
|
| 227 |
+
AI-GENERATED PARTS
|
| 228 |
+
==================
|
| 229 |
+
✓ Embedding vectors
|
| 230 |
+
- Generated by: intfloat/multilingual-e5-large
|
| 231 |
+
- Content: Semantic representation of photo metadata
|
| 232 |
+
- Used for: Similarity search and clustering
|
| 233 |
+
|
| 234 |
+
✓ Album summaries
|
| 235 |
+
- Generated by: facebook/bart-large-cnn
|
| 236 |
+
- Content: Concise description of photo cluster themes
|
| 237 |
+
- Used for: Album display and description
|
| 238 |
+
|
| 239 |
+
✓ Model-specific responses
|
| 240 |
+
- Output format: Determined by HF models
|
| 241 |
+
- Processing: Handled by manual extraction code
|
| 242 |
+
|
| 243 |
+
================================================================================
|
| 244 |
+
5. DEVELOPMENT PROCESS AND DECISIONS
|
| 245 |
+
================================================================================
|
| 246 |
+
|
| 247 |
+
DECISION 1: Model Selection
|
| 248 |
+
Manual Decision: Why facebook/bart-large-cnn?
|
| 249 |
+
- Reasons:
|
| 250 |
+
* Pre-trained on CNN/DailyMail summarization corpus
|
| 251 |
+
* Optimized for multi-sentence summarization
|
| 252 |
+
* Fast inference through Hugging Face API
|
| 253 |
+
* Produces concise, extractive summaries
|
| 254 |
+
|
| 255 |
+
Alternative considered: facebook/bart-base (smaller, faster but lower quality)
|
| 256 |
+
|
| 257 |
+
DECISION 2: Embedding Dimension Resolution
|
| 258 |
+
Manual Decision: Accept 1024-d embeddings (not 512-d)
|
| 259 |
+
- Reason:
|
| 260 |
+
* intfloat/multilingual-e5-large actually produces 1024 dimensions
|
| 261 |
+
* Better semantic representation than 512-d
|
| 262 |
+
* FAISS index configured for 1024-d vectors
|
| 263 |
+
* Updated validation to reflect actual model output
|
| 264 |
+
|
| 265 |
+
DECISION 3: Error Handling Strategy
|
| 266 |
+
Manual Decision: Graceful degradation with fallbacks
|
| 267 |
+
- Implementation:
|
| 268 |
+
* Try summarization first
|
| 269 |
+
* If fails, return truncated text
|
| 270 |
+
* If text is empty, return default message
|
| 271 |
+
* Ensures endpoint never fails due to AI API issues
|
| 272 |
+
|
| 273 |
+
DECISION 4: Output Extraction
|
| 274 |
+
Manual Decision: Flexible type handling for model output
|
| 275 |
+
- Implementation:
|
| 276 |
+
* Handle both list and dict return formats
|
| 277 |
+
* Extract 'summary_text' field when available
|
| 278 |
+
* Fallback to string conversion
|
| 279 |
+
* Ensures compatibility with different API versions
|
| 280 |
+
|
| 281 |
+
================================================================================
|
| 282 |
+
6. TESTING AND VALIDATION
|
| 283 |
+
================================================================================
|
| 284 |
+
|
| 285 |
+
Validation Points:
|
| 286 |
+
✓ Embedding shape validation (must be 1024-d)
|
| 287 |
+
✓ Type conversion to float32
|
| 288 |
+
✓ Summary extraction and string conversion
|
| 289 |
+
✓ Pydantic schema validation (AlbumItem requires string album_summary)
|
| 290 |
+
✓ Error handling and fallbacks
|
| 291 |
+
|
| 292 |
+
Testing Done:
|
| 293 |
+
✓ Manual endpoint testing with sample photos
|
| 294 |
+
✓ Verified embedding shape and type
|
| 295 |
+
✓ Tested summarization with various input lengths
|
| 296 |
+
✓ Validated API error handling
|
| 297 |
+
✓ Checked Pydantic schema compliance
|
| 298 |
+
|
| 299 |
+
================================================================================
|
| 300 |
+
7. ENVIRONMENT CONFIGURATION
|
| 301 |
+
================================================================================
|
| 302 |
+
|
| 303 |
+
Required Environment Variables:
|
| 304 |
+
- HF_TOKEN: Hugging Face API token (for authentication)
|
| 305 |
+
Location: Set in .env file
|
| 306 |
+
Usage: InferenceClient initialization
|
| 307 |
+
Scope: Both ImageEmbeddingGenerator and TextSummarizer
|
| 308 |
+
|
| 309 |
+
API Access:
|
| 310 |
+
- Provider: Hugging Face Inference API
|
| 311 |
+
- Authentication: Token-based via HF_TOKEN
|
| 312 |
+
- Rate Limiting: Subject to HF plan limits
|
| 313 |
+
- Fallback: When unavailable, gracefully returns truncated text
|
| 314 |
+
|
| 315 |
+
================================================================================
|
| 316 |
+
8. PERFORMANCE CONSIDERATIONS
|
| 317 |
+
================================================================================
|
| 318 |
+
|
| 319 |
+
Current Implementation:
|
| 320 |
+
- Summarization called per album cluster (on-demand)
|
| 321 |
+
- Embedding generation per photo upload
|
| 322 |
+
- FAISS vector search (fast, local)
|
| 323 |
+
|
| 324 |
+
Potential Optimizations:
|
| 325 |
+
✓ Cache summaries in database (reduce API calls)
|
| 326 |
+
✓ Batch embedding generation for multiple uploads
|
| 327 |
+
✓ Implement summary caching with TTL
|
| 328 |
+
✓ Consider async processing for large clusters
|
| 329 |
+
|
| 330 |
+
Current Trade-offs:
|
| 331 |
+
- Speed vs Freshness: Summaries generated on-demand (fresh, slower)
|
| 332 |
+
- Accuracy vs Cost: Full text summarization vs cached summaries
|
| 333 |
+
|
| 334 |
+
================================================================================
|
| 335 |
+
SUMMARY
|
| 336 |
+
================================================================================
|
| 337 |
+
|
| 338 |
+
This project demonstrates responsible AI integration:
|
| 339 |
+
|
| 340 |
+
1. Clear Separation: Manual development (infrastructure, logic) vs AI (models)
|
| 341 |
+
2. Error Handling: Graceful degradation when AI services unavailable
|
| 342 |
+
3. Transparency: Documented model choices and output processing
|
| 343 |
+
4. Flexibility: Handle various model output formats
|
| 344 |
+
5. Validation: Schema validation ensures data integrity
|
| 345 |
+
6. Integration: AI models complement, not replace, core functionality
|
| 346 |
+
|
| 347 |
+
AI Value Added:
|
| 348 |
+
- Semantic search capabilities (embeddings)
|
| 349 |
+
- Automated summary generation (reduces manual effort)
|
| 350 |
+
- Better user experience (meaningful album descriptions)
|
| 351 |
+
|
| 352 |
+
Human Involvement:
|
| 353 |
+
- System design and architecture
|
| 354 |
+
- Error handling and edge cases
|
| 355 |
+
- API integration and data processing
|
| 356 |
+
- Schema definition and validation
|
| 357 |
+
- Deployment and configuration
|
| 358 |
+
|
| 359 |
+
================================================================================
|
cloudzy/agents/image_analyzer.py
CHANGED
|
@@ -42,7 +42,7 @@ Describe this image in the following exact format:
|
|
| 42 |
|
| 43 |
result: {
|
| 44 |
"tags": [list of tags related to the image],
|
| 45 |
-
"description": "a
|
| 46 |
"caption": "a short description for the image"
|
| 47 |
}
|
| 48 |
"""
|
|
|
|
| 42 |
|
| 43 |
result: {
|
| 44 |
"tags": [list of tags related to the image],
|
| 45 |
+
"description": "a 5-line descriptive description for the image",
|
| 46 |
"caption": "a short description for the image"
|
| 47 |
}
|
| 48 |
"""
|
cloudzy/ai_utils.py
CHANGED
|
@@ -61,6 +61,47 @@ class ImageEmbeddingGenerator:
|
|
| 61 |
raise ValueError(f"Expected embedding of size 1024, got {embedding.shape[0]}")
|
| 62 |
return embedding
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
# Example usage:
|
| 65 |
if __name__ == "__main__":
|
| 66 |
generator = ImageEmbeddingGenerator()
|
|
|
|
| 61 |
raise ValueError(f"Expected embedding of size 1024, got {embedding.shape[0]}")
|
| 62 |
return embedding
|
| 63 |
|
| 64 |
+
|
| 65 |
+
class TextSummarizer:
|
| 66 |
+
def __init__(self, model_name: str = "facebook/bart-large-cnn"):
|
| 67 |
+
"""
|
| 68 |
+
Initialize the text summarizer with a Hugging Face model.
|
| 69 |
+
"""
|
| 70 |
+
self.client = InferenceClient(
|
| 71 |
+
provider="hf-inference",
|
| 72 |
+
api_key=os.environ["HF_TOKEN_1"],
|
| 73 |
+
)
|
| 74 |
+
self.model_name = model_name
|
| 75 |
+
|
| 76 |
+
def summarize(self, text: str) -> str:
|
| 77 |
+
"""
|
| 78 |
+
Generate a summary of the given text.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
text: Text to summarize
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
summary: Generated summary string
|
| 85 |
+
"""
|
| 86 |
+
if not text or text.strip() == "":
|
| 87 |
+
return "Album of photos"
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
result = self.client.summarization(
|
| 91 |
+
text,
|
| 92 |
+
model=self.model_name,
|
| 93 |
+
)
|
| 94 |
+
# Extract the summary text from the result object
|
| 95 |
+
if isinstance(result, list) and len(result) > 0:
|
| 96 |
+
return result[0].get("summary_text", str(result[0]))
|
| 97 |
+
elif isinstance(result, dict):
|
| 98 |
+
return result.get("summary_text", str(result))
|
| 99 |
+
else:
|
| 100 |
+
return str(result)
|
| 101 |
+
except Exception as e:
|
| 102 |
+
# Fallback if summarization fails
|
| 103 |
+
return f"Collection: {text[:80]}..."
|
| 104 |
+
|
| 105 |
# Example usage:
|
| 106 |
if __name__ == "__main__":
|
| 107 |
generator = ImageEmbeddingGenerator()
|
cloudzy/database.py
CHANGED
|
@@ -14,6 +14,10 @@ engine = create_engine(
|
|
| 14 |
connect_args=connect_args,
|
| 15 |
)
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def create_db_and_tables():
|
| 19 |
"""Create all database tables"""
|
|
|
|
| 14 |
connect_args=connect_args,
|
| 15 |
)
|
| 16 |
|
| 17 |
+
# Session factory for manual session creation
|
| 18 |
+
def SessionLocal():
|
| 19 |
+
return Session(engine)
|
| 20 |
+
|
| 21 |
|
| 22 |
def create_db_and_tables():
|
| 23 |
"""Create all database tables"""
|
cloudzy/routes/photo.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
"""Photo retrieval endpoints"""
|
| 2 |
-
from fastapi import APIRouter, Depends, HTTPException
|
| 3 |
from sqlmodel import Session, select
|
|
|
|
| 4 |
|
| 5 |
from cloudzy.database import get_session
|
| 6 |
from cloudzy.models import Photo
|
| 7 |
-
from cloudzy.schemas import PhotoDetailResponse
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
router = APIRouter(tags=["photos"])
|
| 10 |
|
|
@@ -25,9 +29,12 @@ async def get_photo(
|
|
| 25 |
if not photo:
|
| 26 |
raise HTTPException(status_code=404, detail=f"Photo {photo_id} not found")
|
| 27 |
|
|
|
|
|
|
|
| 28 |
return PhotoDetailResponse(
|
| 29 |
id=photo.id,
|
| 30 |
filename=photo.filename,
|
|
|
|
| 31 |
tags=photo.get_tags(),
|
| 32 |
caption=photo.caption,
|
| 33 |
embedding=photo.get_embedding(),
|
|
@@ -55,15 +62,88 @@ async def list_photos(
|
|
| 55 |
|
| 56 |
statement = select(Photo).offset(skip).limit(limit)
|
| 57 |
photos = session.exec(statement).all()
|
|
|
|
|
|
|
| 58 |
|
| 59 |
return [
|
| 60 |
PhotoDetailResponse(
|
| 61 |
id=photo.id,
|
| 62 |
filename=photo.filename,
|
|
|
|
| 63 |
tags=photo.get_tags(),
|
| 64 |
caption=photo.caption,
|
| 65 |
embedding=photo.get_embedding(),
|
| 66 |
created_at=photo.created_at,
|
| 67 |
)
|
| 68 |
for photo in photos
|
| 69 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Photo retrieval endpoints"""
|
| 2 |
+
from fastapi import APIRouter, Depends, HTTPException,Query
|
| 3 |
from sqlmodel import Session, select
|
| 4 |
+
import numpy as np
|
| 5 |
|
| 6 |
from cloudzy.database import get_session
|
| 7 |
from cloudzy.models import Photo
|
| 8 |
+
from cloudzy.schemas import PhotoDetailResponse,AlbumsResponse,PhotoItem,AlbumItem
|
| 9 |
+
from cloudzy.search_engine import SearchEngine
|
| 10 |
+
from cloudzy.ai_utils import TextSummarizer
|
| 11 |
+
import os
|
| 12 |
|
| 13 |
router = APIRouter(tags=["photos"])
|
| 14 |
|
|
|
|
| 29 |
if not photo:
|
| 30 |
raise HTTPException(status_code=404, detail=f"Photo {photo_id} not found")
|
| 31 |
|
| 32 |
+
APP_DOMAIN = os.getenv("APP_DOMAIN")
|
| 33 |
+
|
| 34 |
return PhotoDetailResponse(
|
| 35 |
id=photo.id,
|
| 36 |
filename=photo.filename,
|
| 37 |
+
image_url = f"{APP_DOMAIN}uploads/{photo.filename}",
|
| 38 |
tags=photo.get_tags(),
|
| 39 |
caption=photo.caption,
|
| 40 |
embedding=photo.get_embedding(),
|
|
|
|
| 62 |
|
| 63 |
statement = select(Photo).offset(skip).limit(limit)
|
| 64 |
photos = session.exec(statement).all()
|
| 65 |
+
|
| 66 |
+
APP_DOMAIN = os.getenv("APP_DOMAIN")
|
| 67 |
|
| 68 |
return [
|
| 69 |
PhotoDetailResponse(
|
| 70 |
id=photo.id,
|
| 71 |
filename=photo.filename,
|
| 72 |
+
image_url = f"{APP_DOMAIN}uploads/{photo.filename}",
|
| 73 |
tags=photo.get_tags(),
|
| 74 |
caption=photo.caption,
|
| 75 |
embedding=photo.get_embedding(),
|
| 76 |
created_at=photo.created_at,
|
| 77 |
)
|
| 78 |
for photo in photos
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@router.get("/albums", response_model=AlbumsResponse)
|
| 83 |
+
async def get_albums(
|
| 84 |
+
top_k: int = Query(5, ge=2, le=50),
|
| 85 |
+
session: Session = Depends(get_session),
|
| 86 |
+
):
|
| 87 |
+
"""
|
| 88 |
+
Create albums of semantically similar photos.
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
search_engine = SearchEngine()
|
| 92 |
+
albums_ids = search_engine.create_albums(top_k=top_k)
|
| 93 |
+
APP_DOMAIN = os.getenv("APP_DOMAIN") or "http://127.0.0.1:8000/"
|
| 94 |
+
summarizer = TextSummarizer()
|
| 95 |
+
|
| 96 |
+
albums_response = []
|
| 97 |
+
|
| 98 |
+
for album_ids in albums_ids:
|
| 99 |
+
# Query all photos in this album in one go
|
| 100 |
+
statement = select(Photo).where(Photo.id.in_(album_ids))
|
| 101 |
+
photos = session.exec(statement).all()
|
| 102 |
+
|
| 103 |
+
# Build a dict for fast lookup
|
| 104 |
+
photo_lookup = {photo.id: photo for photo in photos}
|
| 105 |
+
|
| 106 |
+
album_photos = []
|
| 107 |
+
album_descriptions = [] # Collect captions and tags for summary
|
| 108 |
+
|
| 109 |
+
for pid in album_ids:
|
| 110 |
+
photo = photo_lookup.get(pid)
|
| 111 |
+
if not photo:
|
| 112 |
+
continue
|
| 113 |
+
|
| 114 |
+
# Find distance from FAISS search
|
| 115 |
+
embedding = photo.get_embedding()
|
| 116 |
+
if not embedding:
|
| 117 |
+
continue
|
| 118 |
+
|
| 119 |
+
query_embedding = np.array(embedding).astype(np.float32).reshape(1, -1)
|
| 120 |
+
distances, ids = search_engine.index.search(query_embedding, top_k)
|
| 121 |
+
distance_val = next((d for i, d in zip(ids[0], distances[0]) if i == pid), 0.0)
|
| 122 |
+
|
| 123 |
+
album_photos.append(
|
| 124 |
+
PhotoItem(
|
| 125 |
+
photo_id=photo.id,
|
| 126 |
+
filename=photo.filename,
|
| 127 |
+
image_url=f"{APP_DOMAIN}uploads/{photo.filename}",
|
| 128 |
+
tags=photo.get_tags(),
|
| 129 |
+
caption=photo.caption,
|
| 130 |
+
distance=float(distance_val),
|
| 131 |
+
)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Collect descriptions for album summary
|
| 135 |
+
if photo.caption:
|
| 136 |
+
album_descriptions.append(photo.caption)
|
| 137 |
+
tags = photo.get_tags()
|
| 138 |
+
if tags:
|
| 139 |
+
album_descriptions.append(" ".join(tags))
|
| 140 |
+
|
| 141 |
+
# Generate album summary from compiled descriptions
|
| 142 |
+
combined_description = " ".join(album_descriptions)
|
| 143 |
+
album_summary = summarizer.summarize(combined_description)
|
| 144 |
+
|
| 145 |
+
albums_response.append(
|
| 146 |
+
AlbumItem(album_summary=album_summary, album=album_photos)
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
return albums_response
|
cloudzy/routes/upload.py
CHANGED
|
@@ -62,6 +62,55 @@ def validate_image_file(filename: str) -> bool:
|
|
| 62 |
"""Check if file has valid image extension"""
|
| 63 |
return Path(filename).suffix.lower() in ALLOWED_EXTENSIONS
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
@router.post("/upload", response_model=UploadResponse)
|
| 67 |
async def upload_photo(
|
|
@@ -69,18 +118,7 @@ async def upload_photo(
|
|
| 69 |
session: Session = Depends(get_session),
|
| 70 |
background_tasks: BackgroundTasks = None,
|
| 71 |
):
|
| 72 |
-
|
| 73 |
-
Upload a photo and analyze it with AI.
|
| 74 |
-
|
| 75 |
-
- Validates file type
|
| 76 |
-
- Saves file to disk
|
| 77 |
-
- Generates tags, caption, and embedding
|
| 78 |
-
- Stores metadata in database
|
| 79 |
-
- Indexes embedding in FAISS
|
| 80 |
-
|
| 81 |
-
Returns: Photo metadata with ID
|
| 82 |
-
"""
|
| 83 |
-
# Validate file
|
| 84 |
if not file.filename:
|
| 85 |
raise HTTPException(status_code=400, detail="No filename provided")
|
| 86 |
|
|
@@ -90,79 +128,44 @@ async def upload_photo(
|
|
| 90 |
detail=f"Invalid file type. Allowed: {', '.join(ALLOWED_EXTENSIONS)}"
|
| 91 |
)
|
| 92 |
|
| 93 |
-
# Read file content
|
| 94 |
content = await file.read()
|
| 95 |
if not content:
|
| 96 |
raise HTTPException(status_code=400, detail="Empty file")
|
| 97 |
|
| 98 |
-
# Save file to disk
|
| 99 |
saved_filename = save_uploaded_file(content, file.filename)
|
| 100 |
filepath = f"uploads/{saved_filename}"
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
try:
|
| 106 |
uploader = ImgBBUploader(expiration=600)
|
| 107 |
image_url = uploader.upload(filepath)
|
| 108 |
except Exception as e:
|
| 109 |
raise HTTPException(status_code=500, detail=f"Image upload failed: {str(e)}")
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
try:
|
| 114 |
-
|
| 115 |
-
describer = ImageDescriber()
|
| 116 |
-
# result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_1_20251024_064435_667.jpg")
|
| 117 |
-
# result = describer.describe_image("https://userx2000-cloudzy-ai-challenge.hf.space/uploads/img_2_20251024_082115_102.jpeg")
|
| 118 |
-
result = describer.describe_image(image_url)
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
except Exception as e:
|
| 122 |
-
raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
|
| 123 |
-
|
| 124 |
|
| 125 |
APP_DOMAIN = os.getenv("APP_DOMAIN")
|
|
|
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
# Generate AI analysis
|
| 131 |
-
tags = result.get("tags", [])
|
| 132 |
-
caption = result.get("caption", "")
|
| 133 |
-
description = result.get("description", "")
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
generator = ImageEmbeddingGenerator()
|
| 138 |
-
embedding = generator.generate_embedding(tags, description, caption)
|
| 139 |
-
|
| 140 |
-
# np.save("embedding_2.npy", embedding)
|
| 141 |
-
# embedding = np.load("embedding_2.npy")
|
| 142 |
-
|
| 143 |
-
# Create photo record
|
| 144 |
photo = Photo(
|
| 145 |
filename=saved_filename,
|
| 146 |
filepath=filepath,
|
| 147 |
-
caption=
|
| 148 |
)
|
| 149 |
-
photo.set_tags(tags)
|
| 150 |
-
# photo.set_embedding(embedding.tolist())
|
| 151 |
-
|
| 152 |
-
# Save to database
|
| 153 |
session.add(photo)
|
| 154 |
session.commit()
|
| 155 |
session.refresh(photo)
|
| 156 |
-
|
| 157 |
-
#
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
return UploadResponse(
|
| 162 |
id=photo.id,
|
| 163 |
filename=saved_filename,
|
| 164 |
-
image_url=
|
| 165 |
-
|
| 166 |
-
caption=caption,
|
| 167 |
-
message=f"Photo uploaded successfully with ID {photo.id}"
|
| 168 |
)
|
|
|
|
| 62 |
"""Check if file has valid image extension"""
|
| 63 |
return Path(filename).suffix.lower() in ALLOWED_EXTENSIONS
|
| 64 |
|
| 65 |
+
def process_image_in_background(photo_id: int, filepath: str, image_url: str):
|
| 66 |
+
"""
|
| 67 |
+
Background task to:
|
| 68 |
+
- Describe the image
|
| 69 |
+
- Generate embedding
|
| 70 |
+
- Update database record
|
| 71 |
+
- Index embedding in FAISS
|
| 72 |
+
"""
|
| 73 |
+
from cloudzy.database import SessionLocal
|
| 74 |
+
from sqlmodel import select
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
describer = ImageDescriber()
|
| 78 |
+
print(f"[Background] Processing image {photo_id}...")
|
| 79 |
+
result = describer.describe_image(image_url)
|
| 80 |
+
|
| 81 |
+
tags = result.get("tags", [])
|
| 82 |
+
caption = result.get("caption", "")
|
| 83 |
+
description = result.get("description", "")
|
| 84 |
+
|
| 85 |
+
generator = ImageEmbeddingGenerator()
|
| 86 |
+
embedding = generator.generate_embedding(tags, description, caption)
|
| 87 |
+
|
| 88 |
+
# Use a fresh session for background task
|
| 89 |
+
session = SessionLocal()
|
| 90 |
+
try:
|
| 91 |
+
photo = session.exec(select(Photo).where(Photo.id == photo_id)).first()
|
| 92 |
+
if photo:
|
| 93 |
+
photo.caption = caption
|
| 94 |
+
photo.set_tags(tags)
|
| 95 |
+
photo.set_embedding(embedding.tolist())
|
| 96 |
+
session.add(photo)
|
| 97 |
+
session.commit()
|
| 98 |
+
print(f"[Background] Photo {photo_id} updated with embedding")
|
| 99 |
+
else:
|
| 100 |
+
print(f"[Background] Photo {photo_id} not found in database")
|
| 101 |
+
finally:
|
| 102 |
+
session.close()
|
| 103 |
+
|
| 104 |
+
# Index in FAISS
|
| 105 |
+
search_engine = SearchEngine()
|
| 106 |
+
search_engine.add_embedding(photo_id, embedding)
|
| 107 |
+
print(f"[Background] Photo {photo_id} indexed in FAISS")
|
| 108 |
+
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"[Background Task] Error processing image {photo_id}: {e}")
|
| 111 |
+
import traceback
|
| 112 |
+
traceback.print_exc()
|
| 113 |
+
|
| 114 |
|
| 115 |
@router.post("/upload", response_model=UploadResponse)
|
| 116 |
async def upload_photo(
|
|
|
|
| 118 |
session: Session = Depends(get_session),
|
| 119 |
background_tasks: BackgroundTasks = None,
|
| 120 |
):
|
| 121 |
+
# --- Validate and save file ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
if not file.filename:
|
| 123 |
raise HTTPException(status_code=400, detail="No filename provided")
|
| 124 |
|
|
|
|
| 128 |
detail=f"Invalid file type. Allowed: {', '.join(ALLOWED_EXTENSIONS)}"
|
| 129 |
)
|
| 130 |
|
|
|
|
| 131 |
content = await file.read()
|
| 132 |
if not content:
|
| 133 |
raise HTTPException(status_code=400, detail="Empty file")
|
| 134 |
|
|
|
|
| 135 |
saved_filename = save_uploaded_file(content, file.filename)
|
| 136 |
filepath = f"uploads/{saved_filename}"
|
| 137 |
|
|
|
|
|
|
|
|
|
|
| 138 |
try:
|
| 139 |
uploader = ImgBBUploader(expiration=600)
|
| 140 |
image_url = uploader.upload(filepath)
|
| 141 |
except Exception as e:
|
| 142 |
raise HTTPException(status_code=500, detail=f"Image upload failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
APP_DOMAIN = os.getenv("APP_DOMAIN")
|
| 145 |
+
image_local_url = f"{APP_DOMAIN}uploads/{saved_filename}"
|
| 146 |
|
| 147 |
+
# --- Save photo immediately with empty caption/tags ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
photo = Photo(
|
| 149 |
filename=saved_filename,
|
| 150 |
filepath=filepath,
|
| 151 |
+
caption="", # empty for now
|
| 152 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
session.add(photo)
|
| 154 |
session.commit()
|
| 155 |
session.refresh(photo)
|
| 156 |
+
|
| 157 |
+
# --- Schedule background task ---
|
| 158 |
+
if background_tasks:
|
| 159 |
+
background_tasks.add_task(
|
| 160 |
+
process_image_in_background,
|
| 161 |
+
photo_id=photo.id,
|
| 162 |
+
filepath=filepath,
|
| 163 |
+
image_url=image_url
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
return UploadResponse(
|
| 167 |
id=photo.id,
|
| 168 |
filename=saved_filename,
|
| 169 |
+
image_url=image_local_url,
|
| 170 |
+
message=f"Photo uploaded successfully with ID {photo.id}. AI processing is running in the background."
|
|
|
|
|
|
|
| 171 |
)
|
cloudzy/schemas.py
CHANGED
|
@@ -8,6 +8,7 @@ class PhotoResponse(BaseModel):
|
|
| 8 |
"""Response model for photo metadata"""
|
| 9 |
id: int
|
| 10 |
filename: str
|
|
|
|
| 11 |
tags: List[str]
|
| 12 |
caption: str
|
| 13 |
created_at: datetime
|
|
@@ -21,6 +22,7 @@ class PhotoDetailResponse(PhotoResponse):
|
|
| 21 |
embedding: Optional[List[float]] = None
|
| 22 |
|
| 23 |
|
|
|
|
| 24 |
class SearchResult(BaseModel):
|
| 25 |
"""Search result with similarity score"""
|
| 26 |
photo_id: int
|
|
@@ -46,6 +48,21 @@ class UploadResponse(BaseModel):
|
|
| 46 |
id: int
|
| 47 |
filename: str
|
| 48 |
image_url: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
tags: List[str]
|
| 50 |
caption: str
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""Response model for photo metadata"""
|
| 9 |
id: int
|
| 10 |
filename: str
|
| 11 |
+
image_url: str
|
| 12 |
tags: List[str]
|
| 13 |
caption: str
|
| 14 |
created_at: datetime
|
|
|
|
| 22 |
embedding: Optional[List[float]] = None
|
| 23 |
|
| 24 |
|
| 25 |
+
|
| 26 |
class SearchResult(BaseModel):
|
| 27 |
"""Search result with similarity score"""
|
| 28 |
photo_id: int
|
|
|
|
| 48 |
id: int
|
| 49 |
filename: str
|
| 50 |
image_url: str
|
| 51 |
+
# tags: List[str]
|
| 52 |
+
# caption: str
|
| 53 |
+
message: str
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class PhotoItem(BaseModel):
|
| 57 |
+
photo_id: int
|
| 58 |
+
filename: str
|
| 59 |
+
image_url: str
|
| 60 |
tags: List[str]
|
| 61 |
caption: str
|
| 62 |
+
distance: float
|
| 63 |
+
|
| 64 |
+
class AlbumItem(BaseModel):
|
| 65 |
+
album_summary: str
|
| 66 |
+
album: List[PhotoItem]
|
| 67 |
+
|
| 68 |
+
AlbumsResponse = List[AlbumItem]
|
cloudzy/search_engine.py
CHANGED
|
@@ -19,6 +19,72 @@ class SearchEngine:
|
|
| 19 |
base_index = faiss.IndexFlatL2(dim)
|
| 20 |
self.index = faiss.IndexIDMap(base_index)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def add_embedding(self, photo_id: int, embedding: np.ndarray) -> None:
|
| 23 |
"""
|
| 24 |
Add an embedding to the index.
|
|
|
|
| 19 |
base_index = faiss.IndexFlatL2(dim)
|
| 20 |
self.index = faiss.IndexIDMap(base_index)
|
| 21 |
|
| 22 |
+
def create_albums(self, top_k: int = 5, distance_threshold: float = 0.3) -> List[List[int]]:
|
| 23 |
+
"""
|
| 24 |
+
Group similar images into albums (clusters).
|
| 25 |
+
|
| 26 |
+
For each unvisited photo, finds its top_k most similar photos and creates an album.
|
| 27 |
+
Photos are marked as visited to avoid duplicate albums.
|
| 28 |
+
Only includes photos within the distance threshold.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
top_k: Number of similar images to find for each album
|
| 32 |
+
distance_threshold: Maximum distance to consider photos as similar (default 0.5)
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
List of albums, each album is a list of photo_ids
|
| 36 |
+
"""
|
| 37 |
+
from cloudzy.database import SessionLocal
|
| 38 |
+
from cloudzy.models import Photo
|
| 39 |
+
from sqlmodel import select
|
| 40 |
+
|
| 41 |
+
self.load()
|
| 42 |
+
if self.index.ntotal == 0:
|
| 43 |
+
return []
|
| 44 |
+
|
| 45 |
+
# Get all photo IDs from FAISS index
|
| 46 |
+
id_map = self.index.id_map
|
| 47 |
+
all_ids = [id_map.at(i) for i in range(id_map.size())]
|
| 48 |
+
|
| 49 |
+
visited = set()
|
| 50 |
+
albums = []
|
| 51 |
+
|
| 52 |
+
for photo_id in all_ids:
|
| 53 |
+
# Skip if already in an album
|
| 54 |
+
if photo_id in visited:
|
| 55 |
+
continue
|
| 56 |
+
|
| 57 |
+
# Get embedding from database
|
| 58 |
+
session = SessionLocal()
|
| 59 |
+
try:
|
| 60 |
+
photo = session.exec(select(Photo).where(Photo.id == photo_id)).first()
|
| 61 |
+
if not photo:
|
| 62 |
+
continue
|
| 63 |
+
|
| 64 |
+
embedding = photo.get_embedding()
|
| 65 |
+
if not embedding:
|
| 66 |
+
continue
|
| 67 |
+
|
| 68 |
+
# Search for similar images
|
| 69 |
+
query_embedding = np.array(embedding).reshape(1, -1).astype(np.float32)
|
| 70 |
+
distances, ids = self.index.search(query_embedding, top_k)
|
| 71 |
+
|
| 72 |
+
# Build album: collect similar photos that haven't been visited and are within threshold
|
| 73 |
+
album = []
|
| 74 |
+
for pid, distance in zip(ids[0], distances[0]):
|
| 75 |
+
if pid != -1 and pid not in visited and distance <= distance_threshold:
|
| 76 |
+
album.append(int(pid))
|
| 77 |
+
visited.add(pid)
|
| 78 |
+
|
| 79 |
+
# Add album if it has at least 1 photo
|
| 80 |
+
if album:
|
| 81 |
+
albums.append(album)
|
| 82 |
+
|
| 83 |
+
finally:
|
| 84 |
+
session.close()
|
| 85 |
+
|
| 86 |
+
return albums
|
| 87 |
+
|
| 88 |
def add_embedding(self, photo_id: int, embedding: np.ndarray) -> None:
|
| 89 |
"""
|
| 90 |
Add an embedding to the index.
|
cloudzy/utils/file_upload_service.py
CHANGED
|
@@ -51,7 +51,6 @@ class ImgBBUploader:
|
|
| 51 |
)
|
| 52 |
resp.raise_for_status()
|
| 53 |
data = resp.json()
|
| 54 |
-
print(data)
|
| 55 |
if data.get("success"):
|
| 56 |
return data["data"]["url"]
|
| 57 |
raise RuntimeError(f"Upload failed: {data}")
|
|
|
|
| 51 |
)
|
| 52 |
resp.raise_for_status()
|
| 53 |
data = resp.json()
|
|
|
|
| 54 |
if data.get("success"):
|
| 55 |
return data["data"]["url"]
|
| 56 |
raise RuntimeError(f"Upload failed: {data}")
|