mahmoudsaber0 commited on
Commit
2debab9
·
verified ·
1 Parent(s): d23c0fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -9
app.py CHANGED
@@ -362,16 +362,17 @@ def split_content_in_half(text: str) -> tuple:
362
  return first_half, second_half
363
 
364
 
365
- def analyze_content_halves(model_manager, text: str) -> Dict:
366
  """
367
  Analyze text by splitting it into two halves after cleaning
368
 
369
  Args:
370
  model_manager: The ModelManager instance
371
  text: Original text to analyze
 
372
 
373
  Returns:
374
- Dictionary with analysis of both halves
375
  """
376
  try:
377
  # Clean the content first
@@ -394,6 +395,104 @@ def analyze_content_halves(model_manager, text: str) -> Dict:
394
  second_half_result = model_manager.classify_text(second_half)
395
  second_half_words = len(second_half.split())
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  return {
398
  "halves_analysis_available": True,
399
  "cleaned_content": {
@@ -402,19 +501,20 @@ def analyze_content_halves(model_manager, text: str) -> Dict:
402
  "second_half_words": second_half_words
403
  },
404
  "first_half": {
405
- "ai_percentage": first_half_result["ai_percentage"],
406
  "human_percentage": first_half_result["human_percentage"],
407
- "predicted_model": first_half_result["predicted_model"],
408
  "word_count": first_half_words,
409
  "preview": first_half[:200] + "..." if len(first_half) > 200 else first_half
410
  },
411
  "second_half": {
412
- "ai_percentage": second_half_result["ai_percentage"],
413
  "human_percentage": second_half_result["human_percentage"],
414
- "predicted_model": second_half_result["predicted_model"],
415
  "word_count": second_half_words,
416
  "preview": second_half[:200] + "..." if len(second_half) > 200 else second_half
417
- }
 
418
  }
419
 
420
  except Exception as e:
@@ -589,8 +689,8 @@ async def analyze_text(data: TextInput):
589
  human_percentage = round(100 - ai_percentage, 2)
590
  ai_words = int(recalc_ai_words)
591
 
592
- # 🆕 NEW FEATURE: Analyze content by halves
593
- halves_analysis = analyze_content_halves(model_manager, text)
594
 
595
  # إنشاء رسالة التغذية الراجعة
596
  if ai_percentage > 50:
 
362
  return first_half, second_half
363
 
364
 
365
+ def analyze_content_halves(model_manager, text: str, overall_result: Dict = None) -> Dict:
366
  """
367
  Analyze text by splitting it into two halves after cleaning
368
 
369
  Args:
370
  model_manager: The ModelManager instance
371
  text: Original text to analyze
372
+ overall_result: Overall classification result for variance calculation
373
 
374
  Returns:
375
+ Dictionary with analysis of both halves and final decision
376
  """
377
  try:
378
  # Clean the content first
 
395
  second_half_result = model_manager.classify_text(second_half)
396
  second_half_words = len(second_half.split())
397
 
398
+ # Extract key metrics
399
+ first_ai = first_half_result["ai_percentage"]
400
+ second_ai = second_half_result["ai_percentage"]
401
+ first_model = first_half_result["predicted_model"]
402
+ second_model = second_half_result["predicted_model"]
403
+
404
+ # Calculate average AI score from both halves
405
+ avg_halves_ai_score = (first_ai + second_ai) / 2
406
+
407
+ # Calculate variance between halves
408
+ variance_between_halves = abs(first_ai - second_ai)
409
+
410
+ # Overall AI probability (use overall_result if provided, otherwise calculate)
411
+ overall_ai_prob = overall_result["ai_percentage"] / 100 if overall_result else avg_halves_ai_score / 100
412
+
413
+ # ===== FINAL DECISION LOGIC =====
414
+ verdict = None
415
+ confidence = None
416
+ reasoning = None
417
+
418
+ # Condition 1: Both halves < 50% AI AND second_half predicted_model is "human"
419
+ if first_ai < 50 and second_ai < 50 and second_model.lower() == "human":
420
+ verdict = "HUMAN"
421
+ confidence = "High" if variance_between_halves < 15 else "Medium"
422
+ reasoning = (
423
+ f"Both halves scored below 50% AI probability (First: {first_ai}%, Second: {second_ai}%). "
424
+ f"The second half was classified as human-written. "
425
+ f"Variance between halves is {variance_between_halves:.2f}%, indicating "
426
+ f"{'consistent human patterns' if variance_between_halves < 15 else 'some variation but still human-like'}."
427
+ )
428
+
429
+ # Condition 2: Both halves > 50% AI AND second_half predicted_model is NOT "human"
430
+ elif first_ai > 50 and second_ai > 50 and second_model.lower() != "human":
431
+ verdict = "AI"
432
+
433
+ # Determine confidence based on scores
434
+ if first_ai > 80 and second_ai > 80:
435
+ confidence = "Very High"
436
+ elif first_ai > 70 and second_ai > 70:
437
+ confidence = "High"
438
+ else:
439
+ confidence = "Medium"
440
+
441
+ reasoning = (
442
+ f"Both halves scored above 50% AI probability (First: {first_ai}%, Second: {second_ai}%). "
443
+ f"The pattern matches {second_model} outputs. "
444
+ f"Variance between halves is {variance_between_halves:.2f}%, "
445
+ f"{'showing consistent AI patterns throughout' if variance_between_halves < 20 else 'with some variation in AI generation style'}."
446
+ )
447
+
448
+ # Condition 3: Mixed results - one half AI, one half human
449
+ elif (first_ai > 50 and second_ai < 50) or (first_ai < 50 and second_ai > 50):
450
+ verdict = "MIXED"
451
+ confidence = "Low"
452
+ reasoning = (
453
+ f"Mixed signals detected. First half: {first_ai}% AI, Second half: {second_ai}% AI. "
454
+ f"One portion appears AI-generated while the other seems human-written. "
455
+ f"This could indicate: partial AI assistance, human editing of AI content, "
456
+ f"or AI completion of human-started text. High variance of {variance_between_halves:.2f}% supports mixed authorship."
457
+ )
458
+
459
+ # Condition 4: Both around 50% - uncertain
460
+ else:
461
+ # Check if second_model is human but scores are borderline
462
+ if second_model.lower() == "human":
463
+ verdict = "LIKELY_HUMAN"
464
+ confidence = "Low"
465
+ reasoning = (
466
+ f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%). "
467
+ f"Second half classified as human-written. The text shows characteristics of both "
468
+ f"human and AI writing. Variance: {variance_between_halves:.2f}%."
469
+ )
470
+ else:
471
+ verdict = "LIKELY_AI"
472
+ confidence = "Low"
473
+ reasoning = (
474
+ f"Borderline case with scores near 50% threshold (First: {first_ai}%, Second: {second_ai}%). "
475
+ f"Pattern suggests {second_model} but confidence is low. "
476
+ f"Variance: {variance_between_halves:.2f}%."
477
+ )
478
+
479
+ # Prepare final decision structure
480
+ final_decision = {
481
+ "verdict": verdict,
482
+ "confidence": confidence,
483
+ "reasoning": reasoning,
484
+ "supporting_data": {
485
+ "overall_ai_prob": round(overall_ai_prob, 3),
486
+ "first_half_ai_score": round(first_ai / 100, 3),
487
+ "second_half_ai_score": round(second_ai / 100, 3),
488
+ "avg_halves_ai_score": round(avg_halves_ai_score / 100, 3),
489
+ "variance_between_halves": round(variance_between_halves, 2),
490
+ "first_half_model": first_model,
491
+ "second_half_model": second_model,
492
+ "models_agree": first_model == second_model
493
+ }
494
+ }
495
+
496
  return {
497
  "halves_analysis_available": True,
498
  "cleaned_content": {
 
501
  "second_half_words": second_half_words
502
  },
503
  "first_half": {
504
+ "ai_percentage": first_ai,
505
  "human_percentage": first_half_result["human_percentage"],
506
+ "predicted_model": first_model,
507
  "word_count": first_half_words,
508
  "preview": first_half[:200] + "..." if len(first_half) > 200 else first_half
509
  },
510
  "second_half": {
511
+ "ai_percentage": second_ai,
512
  "human_percentage": second_half_result["human_percentage"],
513
+ "predicted_model": second_model,
514
  "word_count": second_half_words,
515
  "preview": second_half[:200] + "..." if len(second_half) > 200 else second_half
516
+ },
517
+ "final_decision": final_decision
518
  }
519
 
520
  except Exception as e:
 
689
  human_percentage = round(100 - ai_percentage, 2)
690
  ai_words = int(recalc_ai_words)
691
 
692
+ # 🆕 NEW FEATURE: Analyze content by halves (pass overall result for variance calculation)
693
+ halves_analysis = analyze_content_halves(model_manager, text, result)
694
 
695
  # إنشاء رسالة التغذية الراجعة
696
  if ai_percentage > 50: