mahmoudsaber0 commited on
Commit
4edb764
·
verified ·
1 Parent(s): 290af78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -441
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import logging
2
  import gc
3
  import sys
@@ -7,11 +10,6 @@ from fastapi.middleware.cors import CORSMiddleware
7
  from pydantic import BaseModel
8
  from typing import Dict, List, Optional
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
-
11
-
12
-
13
-
14
-
15
  from tokenizers.normalizers import Sequence, Replace, Strip
16
  from tokenizers import Regex
17
  from huggingface_hub import hf_hub_download # Added for reliable HF downloads
@@ -46,13 +44,21 @@ os.environ.setdefault('USER', 'dockeruser')
46
 
47
  # =====================================================
48
  # 🔧 تكوين البيئة والإعدادات
 
 
 
 
 
 
 
 
49
  CACHE_DIR = "/tmp/huggingface_cache"
50
  os.makedirs(CACHE_DIR, exist_ok=True)
51
 
52
- # تكوين متغيرات البيئة لـ Hugging Face (removed TRANSFORMERS_CACHE to avoid deprecation warning)
53
  os.environ.update({
54
  "HF_HOME": CACHE_DIR,
55
-
56
  "HF_DATASETS_CACHE": CACHE_DIR,
57
  "HUGGINGFACE_HUB_CACHE": CACHE_DIR,
58
  "TORCH_HOME": CACHE_DIR,
@@ -61,156 +67,44 @@ os.environ.update({
61
  })
62
 
63
  # إعدادات PyTorch للذاكرة
64
- }
 
 
65
 
66
  # =====================================================
67
- # 🤖 Model Manager - إدارة الموديلات
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
-
80
-
81
-
82
-
83
-
84
-
85
-
86
-
87
-
88
-
89
-
90
-
91
-
92
-
93
-
94
-
95
-
96
-
97
-
98
-
99
-
100
-
101
-
102
-
103
-
104
-
105
-
106
-
107
-
108
-
109
-
110
-
111
-
112
-
113
-
114
-
115
-
116
-
117
-
118
-
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
-
128
-
129
-
130
-
131
-
132
-
133
-
134
-
135
-
136
-
137
-
138
-
139
-
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
-
157
-
158
-
159
-
160
-
161
-
162
-
163
-
164
-
165
-
166
-
167
-
168
-
169
-
170
-
171
-
172
-
173
-
174
-
175
-
176
-
177
-
178
-
179
-
180
-
181
-
182
-
183
-
184
-
185
-
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
-
194
-
195
-
196
-
197
-
198
-
199
-
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
 
 
202
  # =====================================================
203
  class ModelManager:
204
  def __init__(self):
205
  self.tokenizer = None
206
  self.models = []
207
-
208
-
209
  self.models_loaded = False
210
  self.model_urls = [
211
-
212
-
213
-
214
  "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12",
215
  "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
216
  ]
@@ -220,25 +114,6 @@ class ModelManager:
220
 
221
  def load_tokenizer(self):
222
  """تحميل الـ Tokenizer مع fallback"""
223
-
224
-
225
-
226
-
227
-
228
-
229
-
230
-
231
-
232
-
233
-
234
-
235
-
236
-
237
-
238
-
239
-
240
-
241
-
242
  try:
243
  logger.info(f"📝 Loading tokenizer from {self.base_model_id}...")
244
  self.tokenizer = AutoTokenizer.from_pretrained(
@@ -291,7 +166,7 @@ class ModelManager:
291
  self.base_model_id,
292
  num_labels=41,
293
  cache_dir=CACHE_DIR,
294
- dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # Updated from torch_dtype
295
  low_cpu_mem_usage=True,
296
  trust_remote_code=False
297
  )
@@ -305,7 +180,7 @@ class ModelManager:
305
  self.fallback_model_id,
306
  num_labels=41,
307
  cache_dir=CACHE_DIR,
308
- dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # Updated from torch_dtype
309
  low_cpu_mem_usage=True,
310
  trust_remote_code=False
311
  )
@@ -331,58 +206,8 @@ class ModelManager:
331
  filename=filename,
332
  cache_dir=CACHE_DIR,
333
  local_dir_use_symlinks=False
334
-
335
-
336
-
337
-
338
-
339
-
340
-
341
-
342
-
343
-
344
-
345
-
346
-
347
-
348
-
349
-
350
-
351
-
352
-
353
-
354
-
355
-
356
-
357
-
358
-
359
-
360
-
361
-
362
-
363
-
364
-
365
-
366
-
367
-
368
-
369
-
370
  )
371
  state_dict = torch.load(pt_file, map_location=device, weights_only=True)
372
-
373
-
374
-
375
-
376
-
377
-
378
-
379
-
380
-
381
-
382
-
383
-
384
-
385
-
386
 
387
  # تحميل الأوزان فقط إذا لم نكن في وضع fallback (لأن ModernBERT weights قد لا تتوافق مع BERT القياسي)
388
  if not self.using_fallback:
@@ -410,7 +235,7 @@ class ModelManager:
410
  logger.info(f"✅ {model_name} loaded successfully (fallback: {self.using_fallback})")
411
  return model
412
 
413
- def load_models(self, max_models=3): # Increased default to 3 to load local + 2 URLs
414
  """تحميل الموديلات بحد أقصى للذاكرة"""
415
  if self.models_loaded:
416
  logger.info("✨ Models already loaded")
@@ -460,7 +285,6 @@ class ModelManager:
460
 
461
  # التحقق من نجاح التحميل
462
  if len(self.models) > 0:
463
-
464
  self.models_loaded = True
465
  logger.info(f"✅ Successfully loaded {len(self.models)} models (using fallback: {self.using_fallback})")
466
  return True
@@ -482,44 +306,12 @@ class ModelManager:
482
  max_len = 512 if not self.using_fallback else 512 # BERT max is 512
483
  try:
484
  inputs = self.tokenizer(
485
-
486
-
487
-
488
-
489
-
490
-
491
  cleaned_text,
492
  return_tensors="pt",
493
  truncation=True,
494
  max_length=max_len,
495
  padding=True
496
  ).to(device)
497
-
498
-
499
-
500
-
501
-
502
-
503
-
504
-
505
-
506
-
507
-
508
-
509
-
510
-
511
-
512
-
513
-
514
-
515
-
516
-
517
-
518
-
519
-
520
-
521
-
522
-
523
  except Exception as e:
524
  logger.error(f"Tokenization error: {e}")
525
  raise ValueError(f"Failed to tokenize text: {e}")
@@ -543,81 +335,12 @@ class ModelManager:
543
  # حساب المتوسط (Soft Voting)
544
  averaged_probs = torch.mean(torch.stack(all_probabilities), dim=0)
545
  probabilities = averaged_probs[0]
546
-
547
-
548
-
549
-
550
-
551
-
552
-
553
-
554
-
555
-
556
-
557
-
558
-
559
-
560
-
561
-
562
-
563
-
564
-
565
-
566
-
567
-
568
-
569
-
570
-
571
-
572
-
573
-
574
-
575
-
576
-
577
-
578
-
579
-
580
-
581
-
582
-
583
-
584
-
585
-
586
-
587
-
588
-
589
-
590
-
591
-
592
-
593
-
594
-
595
-
596
-
597
-
598
-
599
-
600
-
601
-
602
-
603
-
604
-
605
-
606
-
607
-
608
-
609
-
610
-
611
-
612
-
613
-
614
 
615
  # حساب نسب Human vs AI
616
  human_prob = probabilities[24].item()
617
  ai_probs = probabilities.clone()
618
  ai_probs[24] = 0 # إزالة احتمالية Human
619
  ai_total_prob = ai_probs.sum().item()
620
-
621
 
622
  # التطبيع
623
  total = human_prob + ai_total_prob
@@ -627,13 +350,10 @@ class ModelManager:
627
  else:
628
  human_percentage = 50
629
  ai_percentage = 50
630
-
631
 
632
  # تحديد الموديل الأكثر احتمالاً
633
  ai_model_idx = torch.argmax(ai_probs).item()
634
  predicted_model = label_mapping.get(ai_model_idx, "Unknown")
635
-
636
-
637
 
638
  # أعلى 5 تنبؤات
639
  top_5_probs, top_5_indices = torch.topk(probabilities, 5)
@@ -643,74 +363,6 @@ class ModelManager:
643
  "model": label_mapping.get(idx.item(), "Unknown"),
644
  "probability": round(prob.item() * 100, 2)
645
  })
646
-
647
-
648
-
649
-
650
-
651
-
652
-
653
-
654
-
655
-
656
-
657
-
658
-
659
-
660
-
661
-
662
-
663
-
664
-
665
-
666
-
667
-
668
-
669
-
670
-
671
-
672
-
673
-
674
-
675
-
676
-
677
-
678
-
679
-
680
-
681
-
682
-
683
-
684
-
685
-
686
-
687
-
688
-
689
-
690
-
691
-
692
-
693
-
694
-
695
-
696
-
697
-
698
-
699
-
700
-
701
-
702
-
703
-
704
-
705
-
706
-
707
-
708
-
709
-
710
-
711
-
712
-
713
-
714
 
715
  return {
716
  "human_percentage": round(human_percentage, 2),
@@ -724,18 +376,33 @@ class ModelManager:
724
 
725
  # =====================================================
726
  # 🧹 دوال التنظيف والمعالجة
 
 
 
 
 
 
 
 
 
 
 
 
 
727
  # 🌐 FastAPI Application
728
  # =====================================================
729
  app = FastAPI(
730
  title="ModernBERT AI Text Detector",
731
  description="كشف النصوص المكتوبة بواسطة الذكاء الاصطناعي",
732
- version="2.3.0" # Updated version with 3 models and deprecation fixes
733
  )
734
 
735
  # إضافة CORS للسماح بالاستخدام من المتصفح
736
  app.add_middleware(
737
  CORSMiddleware,
738
  allow_origins=["*"],
 
 
739
  allow_headers=["*"],
740
  )
741
 
@@ -744,11 +411,11 @@ model_manager = ModelManager()
744
 
745
  # =====================================================
746
  # 📝 نماذج البيانات (Pydantic Models)
 
747
  class TextInput(BaseModel):
748
  text: str
749
  analyze_paragraphs: Optional[bool] = False
750
 
751
-
752
  class SimpleTextInput(BaseModel):
753
  text: str
754
 
@@ -756,6 +423,12 @@ class DetectionResult(BaseModel):
756
  success: bool
757
  code: int
758
  message: str
 
 
 
 
 
 
759
  async def startup_event():
760
  """تحميل الموديلات عند بداية التشغيل"""
761
  logger.info("=" * 50)
@@ -768,16 +441,11 @@ async def startup_event():
768
  logger.info("=" * 50)
769
 
770
  # محاولة تحميل الموديلات
771
- max_models = int(os.environ.get("MAX_MODELS", "3")) # Updated default to 3
772
  success = model_manager.load_models(max_models=max_models)
773
-
774
-
775
-
776
-
777
-
778
 
779
  if success:
780
- logger.info(f"✅ Application ready! (Fallback mode: {model_manager.using_fallback})")
781
  else:
782
  logger.error("⚠️ Failed to load models - API will return errors")
783
  logger.info("💡 Tip: Ensure 'transformers>=4.45.0' and 'huggingface_hub' are installed. Run: pip install --upgrade transformers huggingface_hub")
@@ -785,34 +453,34 @@ async def startup_event():
785
  @app.get("/")
786
  async def root():
787
  """الصفحة الرئيسية"""
788
-
789
-
790
-
791
-
792
-
793
  return {
794
  "message": "ModernBERT AI Text Detector API",
795
  "status": "online" if model_manager.models_loaded else "initializing",
796
  "models_loaded": len(model_manager.models),
797
  "using_fallback": model_manager.using_fallback,
798
  "device": str(device),
799
-
800
-
801
-
802
-
803
-
804
-
805
-
806
-
807
  "endpoints": {
808
  "analyze": "/analyze",
809
  "simple": "/analyze-simple",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
810
 
811
  return {
812
  "status": "healthy" if model_manager.models_loaded else "unhealthy",
813
  "models_loaded": len(model_manager.models),
814
  "using_fallback": model_manager.using_fallback,
815
-
816
  "device": str(device),
817
  "cuda_available": torch.cuda.is_available(),
818
  "memory_info": memory_info
@@ -855,7 +523,6 @@ async def analyze_text(data: TextInput):
855
  # النتائج الأساسية
856
  ai_percentage = result["ai_percentage"]
857
  human_percentage = result["human_percentage"]
858
-
859
  ai_words = int(total_words * (ai_percentage / 100))
860
 
861
  # تحليل الفقرات إذا طُلب ذلك
@@ -878,38 +545,15 @@ async def analyze_text(data: TextInput):
878
  "ai_generated_score": para_result["ai_percentage"] / 100,
879
  "human_written_score": para_result["human_percentage"] / 100,
880
  "predicted_model": para_result["predicted_model"]
881
-
882
-
883
-
884
-
885
  })
886
  except Exception as e:
887
  logger.warning(f"Failed to analyze paragraph: {e}")
888
-
889
-
890
-
891
-
892
-
893
-
894
-
895
-
896
-
897
-
898
-
899
-
900
-
901
-
902
-
903
-
904
-
905
 
906
  # إعادة حساب النسب بناءً على الفقرات
907
  if recalc_total_words > 0:
908
  ai_percentage = round((recalc_ai_words / recalc_total_words) * 100, 2)
909
  human_percentage = round(100 - ai_percentage, 2)
910
  ai_words = int(recalc_ai_words)
911
-
912
-
913
 
914
  # إنشاء رسالة التغذية الراجعة
915
  if ai_percentage > 50:
@@ -944,6 +588,9 @@ async def analyze_text(data: TextInput):
944
  success=False,
945
  code=500,
946
  message=f"Analysis failed: {str(e)}",
 
 
 
947
  @app.post("/analyze-simple")
948
  async def analyze_simple(data: SimpleTextInput):
949
  """
@@ -951,6 +598,7 @@ async def analyze_simple(data: SimpleTextInput):
951
  """
952
  try:
953
  text = data.text.strip()
 
954
  raise HTTPException(status_code=400, detail="Empty text")
955
 
956
  if not model_manager.models_loaded:
@@ -958,7 +606,6 @@ async def analyze_simple(data: SimpleTextInput):
958
  raise HTTPException(status_code=503, detail="Models not available")
959
 
960
  result = model_manager.classify_text(text)
961
-
962
 
963
  return {
964
  "is_ai": result["ai_percentage"] > 50,
@@ -967,10 +614,17 @@ async def analyze_simple(data: SimpleTextInput):
967
  "detected_model": result["predicted_model"] if result["ai_percentage"] > 50 else None,
968
  "confidence": max(result["ai_percentage"], result["human_percentage"]),
969
  "using_fallback": result.get("using_fallback", False)
970
-
971
  }
972
 
973
  except HTTPException:
 
 
 
 
 
 
 
 
974
  if __name__ == "__main__":
975
  import uvicorn
976
 
@@ -989,7 +643,6 @@ if __name__ == "__main__":
989
  "main:app", # Assuming this file is named main.py
990
  host=host,
991
  port=port,
992
-
993
  workers=workers,
994
  reload=False # Set to True for dev
995
  )
 
1
+ import os
2
+ import re
3
+ import torch
4
  import logging
5
  import gc
6
  import sys
 
10
  from pydantic import BaseModel
11
  from typing import Dict, List, Optional
12
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
 
 
 
13
  from tokenizers.normalizers import Sequence, Replace, Strip
14
  from tokenizers import Regex
15
  from huggingface_hub import hf_hub_download # Added for reliable HF downloads
 
44
 
45
  # =====================================================
46
  # 🔧 تكوين البيئة والإعدادات
47
+ # =====================================================
48
+ logging.basicConfig(
49
+ level=logging.INFO,
50
+ format='%(asctime)s - %(levelname)s - %(message)s'
51
+ )
52
+ logger = logging.getLogger(__name__)
53
+
54
+ # إعدادات الذاكرة والكاش
55
  CACHE_DIR = "/tmp/huggingface_cache"
56
  os.makedirs(CACHE_DIR, exist_ok=True)
57
 
58
+ # تكوين متغيرات البيئة لـ Hugging Face
59
  os.environ.update({
60
  "HF_HOME": CACHE_DIR,
61
+ "TRANSFORMERS_CACHE": CACHE_DIR,
62
  "HF_DATASETS_CACHE": CACHE_DIR,
63
  "HUGGINGFACE_HUB_CACHE": CACHE_DIR,
64
  "TORCH_HOME": CACHE_DIR,
 
67
  })
68
 
69
  # إعدادات PyTorch للذاكرة
70
+ if torch.cuda.is_available():
71
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
72
+ torch.backends.cudnn.benchmark = True
73
 
74
  # =====================================================
75
+ # 🚀 تحديد الجهاز (GPU أو CPU)
76
+ # =====================================================
77
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
78
+ logger.info(f"🖥️ Using device: {device}")
79
+ if torch.cuda.is_available():
80
+ logger.info(f"🎮 CUDA Device: {torch.cuda.get_device_name(0)}")
81
+ logger.info(f"💾 CUDA Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # =====================================================
84
+ # 📊 خريطة الموديلات
85
+ # =====================================================
86
+ label_mapping = {
87
+ 0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
88
+ 6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
89
+ 11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
90
+ 14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
91
+ 18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
92
+ 22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
93
+ 27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
94
+ 31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
95
+ 35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
96
+ 39: 'text-davinci-002', 40: 'text-davinci-003'
97
+ }
98
 
99
+ # =====================================================
100
+ # 🤖 Model Manager - إدارة الموديلات
101
  # =====================================================
102
  class ModelManager:
103
  def __init__(self):
104
  self.tokenizer = None
105
  self.models = []
 
 
106
  self.models_loaded = False
107
  self.model_urls = [
 
 
 
108
  "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12",
109
  "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
110
  ]
 
114
 
115
  def load_tokenizer(self):
116
  """تحميل الـ Tokenizer مع fallback"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  try:
118
  logger.info(f"📝 Loading tokenizer from {self.base_model_id}...")
119
  self.tokenizer = AutoTokenizer.from_pretrained(
 
166
  self.base_model_id,
167
  num_labels=41,
168
  cache_dir=CACHE_DIR,
169
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
170
  low_cpu_mem_usage=True,
171
  trust_remote_code=False
172
  )
 
180
  self.fallback_model_id,
181
  num_labels=41,
182
  cache_dir=CACHE_DIR,
183
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
184
  low_cpu_mem_usage=True,
185
  trust_remote_code=False
186
  )
 
206
  filename=filename,
207
  cache_dir=CACHE_DIR,
208
  local_dir_use_symlinks=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  )
210
  state_dict = torch.load(pt_file, map_location=device, weights_only=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  # تحميل الأوزان فقط إذا لم نكن في وضع fallback (لأن ModernBERT weights قد لا تتوافق مع BERT القياسي)
213
  if not self.using_fallback:
 
235
  logger.info(f"✅ {model_name} loaded successfully (fallback: {self.using_fallback})")
236
  return model
237
 
238
+ def load_models(self, max_models=2):
239
  """تحميل الموديلات بحد أقصى للذاكرة"""
240
  if self.models_loaded:
241
  logger.info("✨ Models already loaded")
 
285
 
286
  # التحقق من نجاح التحميل
287
  if len(self.models) > 0:
 
288
  self.models_loaded = True
289
  logger.info(f"✅ Successfully loaded {len(self.models)} models (using fallback: {self.using_fallback})")
290
  return True
 
306
  max_len = 512 if not self.using_fallback else 512 # BERT max is 512
307
  try:
308
  inputs = self.tokenizer(
 
 
 
 
 
 
309
  cleaned_text,
310
  return_tensors="pt",
311
  truncation=True,
312
  max_length=max_len,
313
  padding=True
314
  ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  except Exception as e:
316
  logger.error(f"Tokenization error: {e}")
317
  raise ValueError(f"Failed to tokenize text: {e}")
 
335
  # حساب المتوسط (Soft Voting)
336
  averaged_probs = torch.mean(torch.stack(all_probabilities), dim=0)
337
  probabilities = averaged_probs[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
  # حساب نسب Human vs AI
340
  human_prob = probabilities[24].item()
341
  ai_probs = probabilities.clone()
342
  ai_probs[24] = 0 # إزالة احتمالية Human
343
  ai_total_prob = ai_probs.sum().item()
 
344
 
345
  # التطبيع
346
  total = human_prob + ai_total_prob
 
350
  else:
351
  human_percentage = 50
352
  ai_percentage = 50
 
353
 
354
  # تحديد الموديل الأكثر احتمالاً
355
  ai_model_idx = torch.argmax(ai_probs).item()
356
  predicted_model = label_mapping.get(ai_model_idx, "Unknown")
 
 
357
 
358
  # أعلى 5 تنبؤات
359
  top_5_probs, top_5_indices = torch.topk(probabilities, 5)
 
363
  "model": label_mapping.get(idx.item(), "Unknown"),
364
  "probability": round(prob.item() * 100, 2)
365
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
  return {
368
  "human_percentage": round(human_percentage, 2),
 
376
 
377
  # =====================================================
378
  # 🧹 دوال التنظيف والمعالجة
379
+ # =====================================================
380
+ def clean_text(text: str) -> str:
381
+ """تنظيف النص من المسافات الزائدة"""
382
+ text = re.sub(r'\s{2,}', ' ', text)
383
+ text = re.sub(r'\s+([,.;:?!])', r'\1', text)
384
+ return text.strip()
385
+
386
+ def split_into_paragraphs(text: str) -> List[str]:
387
+ """تقسيم النص إلى فقرات"""
388
+ paragraphs = re.split(r'\n\s*\n', text.strip())
389
+ return [p.strip() for p in paragraphs if p.strip()]
390
+
391
+ # =====================================================
392
  # 🌐 FastAPI Application
393
  # =====================================================
394
  app = FastAPI(
395
  title="ModernBERT AI Text Detector",
396
  description="كشف النصوص المكتوبة بواسطة الذكاء الاصطناعي",
397
+ version="2.2.0" # Updated version with UID fix
398
  )
399
 
400
  # إضافة CORS للسماح بالاستخدام من المتصفح
401
  app.add_middleware(
402
  CORSMiddleware,
403
  allow_origins=["*"],
404
+ allow_credentials=True,
405
+ allow_methods=["*"],
406
  allow_headers=["*"],
407
  )
408
 
 
411
 
412
  # =====================================================
413
  # 📝 نماذج البيانات (Pydantic Models)
414
+ # =====================================================
415
  class TextInput(BaseModel):
416
  text: str
417
  analyze_paragraphs: Optional[bool] = False
418
 
 
419
  class SimpleTextInput(BaseModel):
420
  text: str
421
 
 
423
  success: bool
424
  code: int
425
  message: str
426
+ data: Dict
427
+
428
+ # =====================================================
429
+ # 🎯 API Endpoints
430
+ # =====================================================
431
+ @app.on_event("startup")
432
  async def startup_event():
433
  """تحميل الموديلات عند بداية التشغيل"""
434
  logger.info("=" * 50)
 
441
  logger.info("=" * 50)
442
 
443
  # محاولة تحميل الموديلات
444
+ max_models = int(os.environ.get("MAX_MODELS", "2"))
445
  success = model_manager.load_models(max_models=max_models)
 
 
 
 
 
446
 
447
  if success:
448
+ logger.info("✅ Application ready! (Fallback mode: %s)", model_manager.using_fallback)
449
  else:
450
  logger.error("⚠️ Failed to load models - API will return errors")
451
  logger.info("💡 Tip: Ensure 'transformers>=4.45.0' and 'huggingface_hub' are installed. Run: pip install --upgrade transformers huggingface_hub")
 
453
  @app.get("/")
454
  async def root():
455
  """الصفحة الرئيسية"""
 
 
 
 
 
456
  return {
457
  "message": "ModernBERT AI Text Detector API",
458
  "status": "online" if model_manager.models_loaded else "initializing",
459
  "models_loaded": len(model_manager.models),
460
  "using_fallback": model_manager.using_fallback,
461
  "device": str(device),
 
 
 
 
 
 
 
 
462
  "endpoints": {
463
  "analyze": "/analyze",
464
  "simple": "/analyze-simple",
465
+ "health": "/health",
466
+ "docs": "/docs"
467
+ }
468
+ }
469
+
470
+ @app.get("/health")
471
+ async def health_check():
472
+ """فحص صحة الخدمة"""
473
+ memory_info = {}
474
+ if torch.cuda.is_available():
475
+ memory_info = {
476
+ "gpu_allocated_gb": round(torch.cuda.memory_allocated() / 1024**3, 2),
477
+ "gpu_reserved_gb": round(torch.cuda.memory_reserved() / 1024**3, 2)
478
+ }
479
 
480
  return {
481
  "status": "healthy" if model_manager.models_loaded else "unhealthy",
482
  "models_loaded": len(model_manager.models),
483
  "using_fallback": model_manager.using_fallback,
 
484
  "device": str(device),
485
  "cuda_available": torch.cuda.is_available(),
486
  "memory_info": memory_info
 
523
  # النتائج الأساسية
524
  ai_percentage = result["ai_percentage"]
525
  human_percentage = result["human_percentage"]
 
526
  ai_words = int(total_words * (ai_percentage / 100))
527
 
528
  # تحليل الفقرات إذا طُلب ذلك
 
545
  "ai_generated_score": para_result["ai_percentage"] / 100,
546
  "human_written_score": para_result["human_percentage"] / 100,
547
  "predicted_model": para_result["predicted_model"]
 
 
 
 
548
  })
549
  except Exception as e:
550
  logger.warning(f"Failed to analyze paragraph: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
  # إعادة حساب النسب بناءً على الفقرات
553
  if recalc_total_words > 0:
554
  ai_percentage = round((recalc_ai_words / recalc_total_words) * 100, 2)
555
  human_percentage = round(100 - ai_percentage, 2)
556
  ai_words = int(recalc_ai_words)
 
 
557
 
558
  # إنشاء رسالة التغذية الراجعة
559
  if ai_percentage > 50:
 
588
  success=False,
589
  code=500,
590
  message=f"Analysis failed: {str(e)}",
591
+ data={}
592
+ )
593
+
594
  @app.post("/analyze-simple")
595
  async def analyze_simple(data: SimpleTextInput):
596
  """
 
598
  """
599
  try:
600
  text = data.text.strip()
601
+ if not text:
602
  raise HTTPException(status_code=400, detail="Empty text")
603
 
604
  if not model_manager.models_loaded:
 
606
  raise HTTPException(status_code=503, detail="Models not available")
607
 
608
  result = model_manager.classify_text(text)
 
609
 
610
  return {
611
  "is_ai": result["ai_percentage"] > 50,
 
614
  "detected_model": result["predicted_model"] if result["ai_percentage"] > 50 else None,
615
  "confidence": max(result["ai_percentage"], result["human_percentage"]),
616
  "using_fallback": result.get("using_fallback", False)
 
617
  }
618
 
619
  except HTTPException:
620
+ raise
621
+ except Exception as e:
622
+ logger.error(f"Simple analysis error: {e}")
623
+ raise HTTPException(status_code=500, detail=str(e))
624
+
625
+ # =====================================================
626
+ # 🏃 تشغيل التطبيق
627
+ # =====================================================
628
  if __name__ == "__main__":
629
  import uvicorn
630
 
 
643
  "main:app", # Assuming this file is named main.py
644
  host=host,
645
  port=port,
 
646
  workers=workers,
647
  reload=False # Set to True for dev
648
  )