Spaces:

Riy777
/

Trad

Running

App Files Files Community

Riy777 commited on Oct 28

Commit

713e0f7

1 Parent(s): adacd38

Update ml_engine/monte_carlo.py

Browse files

Files changed (1) hide show

ml_engine/monte_carlo.py +194 -60

ml_engine/monte_carlo.py CHANGED Viewed

@@ -1,7 +1,16 @@
 # ml_engine/monte_carlo.py
 import numpy as np
-# 🔴 لا نحتاج scipy.stats إذا استخدمنا np.random.standard_t
-# from scipy.stats import t as student_t
 class MonteCarloAnalyzer:
     def __init__(self):
@@ -9,23 +18,21 @@ class MonteCarloAnalyzer:
     async def generate_1h_price_distribution(self, ohlcv_data, target_profit_percent=0.005):
         """
-        (مُرقّى - المرحلة 1)
-        محاكاة مونت كارلو متقدمة لتوليد توزيع سعري للساعة القادمة.
         - تستخدم توزيع Student-t (للذيول الثقيلة).
         - تستخدم نموذج Merton Jump-Diffusion (للقفزات السعرية).
-        - تُرجع توزيعاً كاملاً، فترات ثقة، ومقاييس مخاطرة.
         """
         try:
             # 1. التحقق من جودة البيانات
             if not ohlcv_data or '1h' not in ohlcv_data or len(ohlcv_data['1h']) < 30:
                 if '15m' in ohlcv_data and len(ohlcv_data['15m']) >= 50:
-                    # استخدام بيانات 15m كاحتياطي إذا كانت 1h غير كافية
                     closes = np.array([candle[4] for candle in ohlcv_data['15m']])
                 else:
                     self.simulation_results = {'error': 'Insufficient OHLCV data (< 30 candles 1h)'}
                     return None
             else:
-                 # دمج بيانات 1h و 15m (إن وجدت) لبيانات إحصائية أفضل
                 all_closes = [candle[4] for candle in ohlcv_data['1h']]
                 if '15m' in ohlcv_data and len(ohlcv_data['15m']) >= 16:
                     all_closes.extend([candle[4] for candle in ohlcv_data['15m'][-16:]])
@@ -48,102 +55,229 @@ class MonteCarloAnalyzer:
                 self.simulation_results = {'error': 'Insufficient log returns (< 20)'}
                 return None
             mean_return = np.mean(log_returns)
             std_return = np.std(log_returns)
-            # 3. إعداد باراميترات المحاكاة (المرحلة 1)
-            num_simulations = 5000       # عدد المسارات (5k كافية مع Student-t)
-            t_df = 10                    # درجات الحرية (DOF) لتوزيع Student-t (أقل = ذيول أثقل)
-            # باراميترات القفز (Merton Jump-Diffusion)
             jump_lambda = 0.05           # احتمالية حدوث قفزة في الساعة (5%)
-            jump_mean = 0.0              # متوسط حجم القفزة (متمركز حول الصفر)
-            jump_std = std_return * 3.0  # تقلب القفزة (3 أضعاف التقلب العادي)
-            # 4. تشغيل المحاكاة
-            # المكون الأول: الانجراف (Drift)
-            # (التعديل لـ Geometric Brownian Motion)
             drift = (mean_return - 0.5 * std_return**2)
-            # المكون الثاني: التقلب (Diffusion) - باستخدام Student-t
-            # (يولد أرقاماً عشوائية تتبع توزيع t)
             diffusion = std_return * np.random.standard_t(df=t_df, size=num_simulations)
-            # المكون الثالث: القفزات (Jumps)
             jump_mask = np.random.rand(num_simulations) < jump_lambda
             jump_sizes = np.random.normal(jump_mean, jump_std, size=num_simulations)
             jump_component = np.zeros(num_simulations)
             jump_component[jump_mask] = jump_sizes[jump_mask]
-            # تجميع العوائد المتوقعة
             simulated_log_returns = drift + diffusion + jump_component
-            # حساب الأسعار النهائية المتوقعة
             simulated_prices = current_price * np.exp(simulated_log_returns)
-            # 5. حساب المخرجات والتوزيع
-            # حساب التوزيع الإحصائي
             mean_price = np.mean(simulated_prices)
             median_price = np.median(simulated_prices)
-            # حساب فترات الثقة (Prediction Intervals)
             percentiles = np.percentile(simulated_prices, [2.5, 5, 25, 50, 75, 95, 97.5])
             pi_95 = [percentiles[0], percentiles[-1]]
             pi_90 = [percentiles[1], percentiles[-2]]
-            pi_50 = [percentiles[2], percentiles[4]] # يُعرف أيضاً بـ Interquartile Range (IQR)
-            # حساب مقاييس المخاطرة (VaR و CVaR)
-            # VaR @ 95%: ما هي أقصى خسارة متوقعة في 95% من السيناريوهات؟ (الخسارة عند النسبة المئوية 5)
             VaR_95_price = percentiles[1]
             VaR_95_value = current_price - VaR_95_price
-            # CVaR @ 95%: ما هو متوسط الخسارة إذا تجاوزنا عتبة VaR؟ (متوسط كل الأسعار الأقل من VaR_95_price)
             losses_beyond_var = simulated_prices[simulated_prices <= VaR_95_price]
-            CVaR_95_price = np.mean(losses_beyond_var) if len(losses_beyond_var) > 0 else VaR_95_price
-            CVaR_95_value = current_price - CVaR_95_price
-            # حساب الاحتمالية المطلوبة (للتوافق الجزئي مع الدرجات)
             target_price = current_price * (1 + target_profit_percent)
             probability_of_gain = np.mean(simulated_prices >= target_price)
-            # 6. تجميع النتائج
             self.simulation_results = {
                 'simulation_model': 'Phase1_Student-t_JumpDiffusion',
                 'num_simulations': num_simulations,
                 'current_price': current_price,
-                'distribution_summary': {
-                    'mean_price': mean_price,
-                    'median_price': median_price,
-                },
                 'prediction_interval_50': pi_50,
                 'prediction_interval_90': pi_90,
                 'prediction_interval_95': pi_95,
                 'risk_metrics': {
                     'VaR_95_price': VaR_95_price,
                     'VaR_95_value': VaR_95_value,
-                    'CVaR_95_price': CVaR_95_price,
                     'CVaR_95_value': CVaR_95_value,
                 },
-                'probability_of_gain': probability_of_gain, # (P >= 0.5% profit)
-                'raw_simulated_prices': simulated_prices[:100] # عينة صغيرة للتتبع
             }
             return self.simulation_results
         except Exception as e:
-            print(f"❌ خطأ فادح في محاكاة مونت كارلو المتقدمة: {e}")
-            self.simulation_results = {'error': f'Fatal simulation error: {e}'}
             return None
-    # 🔴 الدالة القديمة (محذوفة)
-    # async def predict_1h_probability(self, ohlcv_data): ...
-    # 🔴 دالة حساب الاتجاه (غير مستخدمة حالياً في المرحلة 1، لكنها قد تكون مفيدة لاحقاً)
     def _calculate_trend_adjustment(self, closes):
-        """حساب معامل تعديل الاتجاه"""
         try:
             if len(closes) < 10: return 1.0
             recent_trend = (closes[-1] - closes[-10]) / closes[-10]
@@ -154,4 +288,4 @@ class MonteCarloAnalyzer:
             else: return 1.0
         except Exception: return 1.0
-print("✅ ML Module: Advanced Monte Carlo Analyzer loaded (Phase 1: T-Dist + Jumps)")

 # ml_engine/monte_carlo.py
 import numpy as np
+import pandas as pd
+from arch import arch_model
+import lightgbm as lgb
+from sklearn.preprocessing import StandardScaler
+# نستورد مكتبات المؤشرات (نفترض وجودها في ملف المؤشرات أو نحسبها يدوياً)
+# لتبسيط الأمر هنا، سنستخدم pandas_ta إذا كانت متاحة، أو حسابات بسيطة
+try:
+    import pandas_ta as ta
+except ImportError:
+    print("⚠️ مكتبة pandas_ta غير موجودة، سيتم استخدام حسابات يدوية للمؤشرات.")
+    ta = None
 class MonteCarloAnalyzer:
     def __init__(self):
     async def generate_1h_price_distribution(self, ohlcv_data, target_profit_percent=0.005):
         """
+        (المرحلة 1 - سريعة)
+        محاكاة مونت كارلو لتوليد توزيع سعري للساعة القادمة (للفرز الأولي).
         - تستخدم توزيع Student-t (للذيول الثقيلة).
         - تستخدم نموذج Merton Jump-Diffusion (للقفزات السعرية).
+        - تستخدم المتوسط/الانحراف التاريخي البسيط.
         """
         try:
             # 1. التحقق من جودة البيانات
             if not ohlcv_data or '1h' not in ohlcv_data or len(ohlcv_data['1h']) < 30:
                 if '15m' in ohlcv_data and len(ohlcv_data['15m']) >= 50:
                     closes = np.array([candle[4] for candle in ohlcv_data['15m']])
                 else:
                     self.simulation_results = {'error': 'Insufficient OHLCV data (< 30 candles 1h)'}
                     return None
             else:
                 all_closes = [candle[4] for candle in ohlcv_data['1h']]
                 if '15m' in ohlcv_data and len(ohlcv_data['15m']) >= 16:
                     all_closes.extend([candle[4] for candle in ohlcv_data['15m'][-16:]])
                 self.simulation_results = {'error': 'Insufficient log returns (< 20)'}
                 return None
+            # 🔴 استخدام المتوسط والانحراف التاريخي (بسيط وسريع)
             mean_return = np.mean(log_returns)
             std_return = np.std(log_returns)
+            # 3. إعداد باراميترات المحاكاة
+            num_simulations = 5000       # عدد المسارات
+            t_df = 10                    # درجات الحرية (DOF) لتوزيع Student-t
             jump_lambda = 0.05           # احتمالية حدوث قفزة في الساعة (5%)
+            jump_mean = 0.0              # متوسط حجم القفزة
+            jump_std = std_return * 3.0  # تقلب القفزة
+            # 4. تشغيل المحاكاة (كما في الإصدار السابق)
             drift = (mean_return - 0.5 * std_return**2)
             diffusion = std_return * np.random.standard_t(df=t_df, size=num_simulations)
             jump_mask = np.random.rand(num_simulations) < jump_lambda
             jump_sizes = np.random.normal(jump_mean, jump_std, size=num_simulations)
             jump_component = np.zeros(num_simulations)
             jump_component[jump_mask] = jump_sizes[jump_mask]
             simulated_log_returns = drift + diffusion + jump_component
             simulated_prices = current_price * np.exp(simulated_log_returns)
+            # 5. حساب المخرجات والتوزيع (كما في الإصدار السابق)
             mean_price = np.mean(simulated_prices)
             median_price = np.median(simulated_prices)
             percentiles = np.percentile(simulated_prices, [2.5, 5, 25, 50, 75, 95, 97.5])
             pi_95 = [percentiles[0], percentiles[-1]]
             pi_90 = [percentiles[1], percentiles[-2]]
+            pi_50 = [percentiles[2], percentiles[4]]
             VaR_95_price = percentiles[1]
             VaR_95_value = current_price - VaR_95_price
             losses_beyond_var = simulated_prices[simulated_prices <= VaR_95_price]
+            CVR_95_price = np.mean(losses_beyond_var) if len(losses_beyond_var) > 0 else VaR_95_price
+            CVaR_95_value = current_price - CVR_95_price
             target_price = current_price * (1 + target_profit_percent)
             probability_of_gain = np.mean(simulated_prices >= target_price)
             self.simulation_results = {
                 'simulation_model': 'Phase1_Student-t_JumpDiffusion',
                 'num_simulations': num_simulations,
                 'current_price': current_price,
+                'distribution_summary': {'mean_price': mean_price, 'median_price': median_price},
                 'prediction_interval_50': pi_50,
                 'prediction_interval_90': pi_90,
                 'prediction_interval_95': pi_95,
                 'risk_metrics': {
                     'VaR_95_price': VaR_95_price,
                     'VaR_95_value': VaR_95_value,
+                    'CVaR_95_price': CVR_95_price,
                     'CVaR_95_value': CVaR_95_value,
                 },
+                'probability_of_gain': probability_of_gain,
+                'raw_simulated_prices': simulated_prices[:100]
             }
             return self.simulation_results
         except Exception as e:
+            self.simulation_results = {'error': f'Phase 1 MC Error: {e}'}
             return None
+    # 🔴 --- دالة جديدة --- 🔴
+    async def generate_1h_distribution_advanced(self, ohlcv_data, target_profit_percent=0.005):
+        """
+        (المرحلة 2+3 - متقدمة)
+        محاكاة مونت كارلو لتوليد توزيع سعري دقيق (لأفضل 10 مرشحين).
+        - تستخدم GARCH(1,1) لتوقع التقلب (Phase 2).
+        - تستخدم LightGBM لتوقع الميل/Drift (Phase 3).
+        - تستخدم Student-t و Jumps للمحاكاة.
+        """
+        try:
+            # 1. إعداد البيانات (DataFrame)
+            # نستخدم إطار 1h لأنه الأنسب لـ GARCH/LGBM لتوقع الساعة القادمة
+            if not ohlcv_data or '1h' not in ohlcv_data or len(ohlcv_data['1h']) < 50:
+                self.simulation_results = {'error': 'Advanced MC requires 1h data (>= 50 candles)'}
+                # كحل احتياطي، يمكننا العودة للنموذج البسيط إذا فشل المتقدم
+                return await self.generate_1h_price_distribution(ohlcv_data, target_profit_percent)
+            candles = ohlcv_data['1h']
+            df = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
+            df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].astype(float)
+            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
+            df.set_index('timestamp', inplace=True)
+            df.sort_index(inplace=True)
+            if df.empty or len(df) < 50:
+                raise ValueError("DataFrame creation failed or insufficient data after processing")
+            current_price = df['close'].iloc[-1]
+            # 2. حساب العوائد اللوغاريتمية (أساس كل الحسابات)
+            df['log_returns'] = np.log(df['close'] / df['close'].shift(1)).fillna(0)
+            log_returns_series = df['log_returns'].replace([np.inf, -np.inf], 0)
+            # 3. (Phase 2) توقع التقلب باستخدام GARCH(1,1)
+            try:
+                # نضرب العوائد في 100 لتساعد GARCH على الاستقرار (ممارسة شائعة)
+                garch_model = arch_model(log_returns_series * 100, vol='Garch', p=1, q=1, dist='t')
+                # 🔴 استخدام disp='off' لإيقاف الطباعة الكثيفة، كما طلبت
+                res = garch_model.fit(update_freq=0, disp='off')
+                forecast = res.forecast(horizon=1)
+                # أخذ التباين المتوقع (variance) وقسمته على 10000 (لأننا ضربنا في 100)
+                forecasted_var = forecast.variance.iloc[-1, 0] / 10000
+                forecasted_std_return = np.sqrt(forecasted_var)
+            except Exception as garch_err:
+                # في حال فشل GARCH (بيانات غير مستقرة)، نعود للانحراف المعياري العادي
+                forecasted_std_return = np.std(log_returns_series.iloc[-30:]) # انحراف آخر 30 شمعة
+                print(f"⚠️ GARCH failed, using std: {garch_err}")
+            # 4. (Phase 3) توقع الميل (Drift) باستخدام LightGBM
+            try:
+                # 4a. هندسة الميزات
+                if ta:
+                    df['rsi'] = ta.rsi(df['close'], length=14)
+                    macd = ta.macd(df['close'], fast=12, slow=26, signal=9)
+                    df['macd_hist'] = macd['MACDh_12_26_9']
+                else: # حسابات يدوية بسيطة
+                    delta = df['close'].diff()
+                    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
+                    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
+                    rs = gain / loss
+                    df['rsi'] = 100 - (100 / (1 + rs))
+                    df['macd_hist'] = df['close'].ewm(span=12).mean() - df['close'].ewm(span=26).mean()
+                df['lag_1'] = df['log_returns'].shift(1)
+                df['lag_2'] = df['log_returns'].shift(2)
+                features = ['rsi', 'macd_hist', 'lag_1', 'lag_2']
+                df.dropna(inplace=True)
+                if df.empty or len(df) < 20:
+                     raise ValueError("Insufficient data after feature engineering")
+                # 4b. إعداد بيانات التدريب والتنبؤ
+                df['target'] = df['log_returns'].shift(-1) # الهدف هو العائد *التالي*
+                df.dropna(inplace=True)
+                X = df[features]
+                y = df['target']
+                X_train, y_train = X.iloc[:-1], y.iloc[:-1] # كل البيانات ما عدا الأخيرة
+                X_predict = X.iloc[-1:] # آخر صف من الميزات للتنبؤ
+                # 4c. تدريب نموذج LGBM
+                lgbm_model = lgb.LGBMRegressor(n_estimators=100, learning_rate=0.1, n_jobs=1, verbose=-1)
+                lgbm_model.fit(X_train, y_train)
+                # 4d. التنبؤ بالميل
+                forecasted_mean_return = lgbm_model.predict(X_predict)[0]
+            except Exception as lgbm_err:
+                # في حال فشل LGBM، نعود للمتوسط العادي
+                forecasted_mean_return = np.mean(log_returns_series.iloc[-30:]) # متوسط آخر 30 شمعة
+                print(f"⚠️ LGBM failed, using mean: {lgbm_err}")
+            # 5. تشغيل المحاكاة بالقيم الديناميكية
+            # استخدام نفس الباراميترات (T-Dist, Jumps)
+            num_simulations = 5000
+            t_df = 10
+            jump_lambda = 0.05
+            jump_mean = 0.0
+            # تقلب القفزة يعتمد الآن على التقلب المتوقع من GARCH
+            jump_std = forecasted_std_return * 3.0
+            # 🔴 استخدام القيم المتوقعة
+            mean_return = forecasted_mean_return
+            std_return = forecasted_std_return
+            drift = (mean_return - 0.5 * std_return**2)
+            diffusion = std_return * np.random.standard_t(df=t_df, size=num_simulations)
+            jump_mask = np.random.rand(num_simulations) < jump_lambda
+            jump_sizes = np.random.normal(jump_mean, jump_std, size=num_simulations)
+            jump_component = np.zeros(num_simulations)
+            jump_component[jump_mask] = jump_sizes[jump_mask]
+            simulated_log_returns = drift + diffusion + jump_component
+            simulated_prices = current_price * np.exp(simulated_log_returns)
+            # 6. حساب المخرجات والتوزيع (نفس منطق المرحلة 1)
+            mean_price = np.mean(simulated_prices)
+            median_price = np.median(simulated_prices)
+            percentiles = np.percentile(simulated_prices, [2.5, 5, 25, 50, 75, 95, 97.5])
+            pi_95 = [percentiles[0], percentiles[-1]]
+            pi_90 = [percentiles[1], percentiles[-2]]
+            pi_50 = [percentiles[2], percentiles[4]]
+            VaR_95_price = percentiles[1]
+            VaR_95_value = current_price - VaR_95_price
+            losses_beyond_var = simulated_prices[simulated_prices <= VaR_95_price]
+            CVR_95_price = np.mean(losses_beyond_var) if len(losses_beyond_var) > 0 else VaR_95_price
+            CVaR_95_value = current_price - CVR_95_price
+            target_price = current_price * (1 + target_profit_percent)
+            probability_of_gain = np.mean(simulated_prices >= target_price)
+            self.simulation_results = {
+                'simulation_model': 'Phase2_GARCH_LGBM', # 🔴 تحديد النموذج المتقدم
+                'num_simulations': num_simulations,
+                'current_price': current_price,
+                'forecasted_drift_lgbm': forecasted_mean_return, # 🔴 إضافة للتتبع
+                'forecasted_vol_garch': forecasted_std_return, # 🔴 إضافة للتتبع
+                'distribution_summary': {'mean_price': mean_price, 'median_price': median_price},
+                'prediction_interval_50': pi_50,
+                'prediction_interval_90': pi_90,
+                'prediction_interval_95': pi_95,
+                'risk_metrics': {
+                    'VaR_95_price': VaR_95_price,
+                    'VaR_95_value': VaR_95_value,
+                    'CVaR_95_price': CVR_95_price,
+                    'CVaR_95_value': CVaR_95_value,
+                },
+                'probability_of_gain': probability_of_gain,
+                'raw_simulated_prices': simulated_prices[:100]
+            }
+            return self.simulation_results
+        except Exception as e:
+            print(f"❌ خطأ فادح في محاكاة مونت كارلو المتقدمة (GARCH/LGBM): {e}")
+            traceback.print_exc()
+            self.simulation_results = {'error': f'Advanced MC Error: {e}'}
+            # العودة إلى المرحلة 1 في حالة الفشل الفادح
+            return await self.generate_1h_price_distribution(ohlcv_data, target_profit_percent)
     def _calculate_trend_adjustment(self, closes):
+        """(غير مستخدمة حالياً)"""
         try:
             if len(closes) < 10: return 1.0
             recent_trend = (closes[-1] - closes[-10]) / closes[-10]
             else: return 1.0
         except Exception: return 1.0
+print("✅ ML Module: Advanced Monte Carlo Analyzer loaded (Phase 1 + Phase 2/3)")