File size: 8,057 Bytes
f20194e
 
 
 
eed2f95
141e89b
 
f20194e
141e89b
eed2f95
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141e89b
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141e89b
f20194e
 
 
 
 
141e89b
f20194e
141e89b
f20194e
 
141e89b
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141e89b
 
f20194e
141e89b
f20194e
 
141e89b
f20194e
 
141e89b
f20194e
 
 
 
 
94f10f5
f20194e
 
 
 
 
 
 
 
 
 
141e89b
f20194e
141e89b
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141e89b
f20194e
 
 
 
 
 
 
 
94f10f5
141e89b
f20194e
 
 
 
 
 
 
94f10f5
 
f20194e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94f10f5
 
 
f20194e
 
 
 
141e89b
 
f20194e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python3
"""
ResearchMate - Main Application Entry Point
"""

import os
import sys
import logging
from pathlib import Path

# Set up environment variables before importing anything else
def setup_environment():
    """Configure environment variables for writable paths"""
    # Force all paths to writable locations
    env_vars = {
        'DATA_DIR': '/tmp/researchmate/data',
        'LOGS_DIR': '/tmp/researchmate/logs',
        'CHROMA_DIR': '/tmp/researchmate/chroma_persist',
        'UPLOADS_DIR': '/tmp/researchmate/uploads',
        'CHROMA_DB_DIR': '/tmp/researchmate/chroma_db',
        'CONFIG_DIR': '/tmp/researchmate/config',
        'TEMP_DIR': '/tmp/researchmate/tmp',
        'CHROMA_PERSIST_DIR': '/tmp/researchmate/chroma_persist',  # Additional key
        
        # Cache directories
        'MPLCONFIGDIR': '/tmp/matplotlib',
        'TRANSFORMERS_CACHE': '/tmp/transformers',
        'HF_HOME': '/tmp/huggingface',
        'SENTENCE_TRANSFORMERS_HOME': '/tmp/sentence_transformers',
        'HF_DATASETS_CACHE': '/tmp/datasets',
        'HUGGINGFACE_HUB_CACHE': '/tmp/huggingface_hub',
        'XDG_CACHE_HOME': '/tmp/cache',
        
        # Additional variables to prevent /data access
        'PYTORCH_KERNEL_CACHE_PATH': '/tmp/cache',
        'TORCH_HOME': '/tmp/cache',
        'NLTK_DATA': '/tmp/cache/nltk_data',
        'TOKENIZERS_PARALLELISM': 'false',
        
        # Override any hardcoded paths
        'HOME': '/tmp/cache',
        'TMPDIR': '/tmp/researchmate/tmp',
        
        # HF Spaces specific - prevent access to /data
        'HF_DATASETS_OFFLINE': '1',
        'HF_HUB_OFFLINE': '0',
    }
    
    for key, value in env_vars.items():
        os.environ[key] = value  # Force set all environment variables
    
    # Also set any Python path variables
    sys.path.insert(0, '/tmp/cache')
    
    # Create directories if they don't exist
    directories = [
        '/tmp/researchmate/data',
        '/tmp/researchmate/logs',
        '/tmp/researchmate/chroma_persist',
        '/tmp/researchmate/uploads',
        '/tmp/researchmate/chroma_db',
        '/tmp/researchmate/config',
        '/tmp/researchmate/tmp',
        '/tmp/matplotlib',
        '/tmp/transformers',
        '/tmp/huggingface',
        '/tmp/sentence_transformers',
        '/tmp/datasets',
        '/tmp/huggingface_hub',
        '/tmp/cache',
        '/tmp/cache/nltk_data'
    ]
    
    for directory in directories:
        try:
            path = Path(directory)
            path.mkdir(parents=True, exist_ok=True)
            # Ensure write permissions
            path.chmod(0o777)
            print(f"βœ“ Created/verified directory: {directory}")
        except Exception as e:
            print(f"⚠ Warning: Could not create directory {directory}: {e}")

# Set up environment FIRST, before any imports
setup_environment()

# Now import other modules
import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse

# Configure logging early
log_file = os.path.join(os.environ.get('LOGS_DIR', '/tmp/researchmate/logs'), 'app.log')
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler(log_file, mode='a')
    ]
)

logger = logging.getLogger(__name__)

def main():
    """Main application entry point"""
    try:
        print("===== ResearchMate Application Startup =====")
        print("Setting up environment...")
        
        # Double-check environment is properly set
        print(f"CHROMA_DIR: {os.environ.get('CHROMA_DIR')}")
        print(f"UPLOADS_DIR: {os.environ.get('UPLOADS_DIR')}")
        print(f"LOGS_DIR: {os.environ.get('LOGS_DIR')}")
        print(f"HF_HOME: {os.environ.get('HF_HOME')}")
        
        # Import settings after environment setup
        try:
            from src.settings import get_settings
            settings = get_settings()
            print(f"βœ“ Settings loaded successfully")
            print(f"Database directory: {settings.database.chroma_persist_dir}")
        except Exception as e:
            print(f"⚠ Settings loading failed: {e}")
            # Continue with basic settings
            settings = None
        
        print("Starting ResearchMate background initialization...")
        
        # Initialize components with error handling
        research_mate = None
        try:
            from src.components.research_assistant import ResearchMate
            research_mate = ResearchMate()
            print("βœ“ ResearchMate initialized successfully")
        except Exception as e:
            print(f"βœ— Failed to initialize ResearchMate: {e}")
            import traceback
            traceback.print_exc()
            print("⚠ Server will start but ResearchMate features may not work")
        
        # Create FastAPI app
        app = FastAPI(
            title="ResearchMate",
            description="AI-powered research assistant",
            version="1.0.0"
        )
        
        # Add middleware
        if settings:
            app.add_middleware(
                CORSMiddleware,
                allow_origins=settings.security.cors_origins,
                allow_credentials=True,
                allow_methods=settings.security.cors_methods,
                allow_headers=settings.security.cors_headers,
            )
        else:
            # Basic CORS for HF Spaces
            app.add_middleware(
                CORSMiddleware,
                allow_origins=["*"],
                allow_credentials=True,
                allow_methods=["*"],
                allow_headers=["*"],
            )
        
        app.add_middleware(GZipMiddleware, minimum_size=1000)
        
        # Health check endpoint
        @app.get("/health")
        async def health_check():
            return JSONResponse({
                "status": "healthy", 
                "version": "1.0.0",
                "chroma_dir": os.environ.get('CHROMA_DIR'),
                "writable_test": "OK"
            })
        
        # Basic root endpoint
        @app.get("/")
        async def root():
            return JSONResponse({
                "message": "ResearchMate API",
                "status": "running",
                "research_mate_available": research_mate is not None
            })
        
        # Mount static files if available
        try:
            if settings:
                static_dir = settings.get_static_dir()
            else:
                static_dir = "src/static"
            
            if Path(static_dir).exists():
                app.mount("/static", StaticFiles(directory=static_dir), name="static")
                print(f"βœ“ Static files mounted from: {static_dir}")
        except Exception as e:
            logger.warning(f"Could not mount static files: {e}")
        
        # No API routers to include (src.api.routes does not exist)
        # If you add API routers in the future, include them here.
        
        # For Hugging Face Spaces, use port 7860
        port = int(os.environ.get("PORT", 7860))
        host = os.environ.get("HOST", "0.0.0.0")
        
        print(f"πŸš€ Starting server on {host}:{port}")
        if settings:
            print(f"πŸ“ Data directory: {settings.database.chroma_persist_dir}")
            print(f"πŸ“€ Upload directory: {settings.get_upload_dir()}")
            print(f"πŸ”§ Config file: {settings.config_file}")
        
        # Start the server
        uvicorn.run(
            app,
            host=host,
            port=port,
            log_level="info",
            access_log=True
        )
        
    except Exception as e:
        logger.error(f"Failed to start application: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()