Commit
·
98c5ea8
1
Parent(s):
16e2cb0
new weekly data using new pipeline
Browse files- data/all_trades_profitability.parquet +2 -2
- data/fpmmTrades.parquet +2 -2
- data/fpmms.parquet +2 -2
- data/invalid_trades.parquet +2 -2
- data/summary_profitability.parquet +2 -2
- data/t_map.pkl +2 -2
- data/tools.parquet +2 -2
- data/tools_accuracy.csv +2 -2
- scripts/get_mech_info.py +54 -27
- scripts/profitability.py +18 -4
- scripts/pull_data.py +3 -3
- scripts/utils.py +30 -0
data/all_trades_profitability.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1df952a693ba00cc0b11bca8ff4c6805415f2d006b3cd47242e43e7cdc7d5fe1
|
| 3 |
+
size 3266876
|
data/fpmmTrades.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:390f96495582e69ae82225a61e6473c1fe6536081b326a6bd11617be45ce672a
|
| 3 |
+
size 10816943
|
data/fpmms.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b5dcd19c7922e3f7168a139b0d63c335c921343faa15852b6ae04888f7e006a
|
| 3 |
+
size 504817
|
data/invalid_trades.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30e0fa52d0c605961b5a12bec47bc3b0288b02b814c61cc7f8a33ad793f8bd30
|
| 3 |
+
size 84013
|
data/summary_profitability.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a55a6c4c7ef5da8db27b61c268eccbd1d426c456a0d17efa4b22b7c69ed1454d
|
| 3 |
+
size 78788
|
data/t_map.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b67cf178943b82b5286b7c2adb6329e1e23fffce807ebf299684746813f55de
|
| 3 |
+
size 22992649
|
data/tools.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e4ace8d172836c379ee23bde678f19d9eeec28e7bd31bf9e95dc914ac5c9bc5
|
| 3 |
+
size 407088092
|
data/tools_accuracy.csv
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7a3622338d1eb2f23824031733ecdd77ae77eff7cb2b1c879aba05b0966d2cc
|
| 3 |
+
size 1133
|
scripts/get_mech_info.py
CHANGED
|
@@ -120,7 +120,7 @@ def update_json_files():
|
|
| 120 |
merge_json_files("tools_info.json", "new_tools_info.json")
|
| 121 |
|
| 122 |
|
| 123 |
-
def
|
| 124 |
# Read old trades parquet file
|
| 125 |
try:
|
| 126 |
old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
|
|
@@ -128,58 +128,78 @@ def update_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
|
|
| 128 |
print(f"Error reading old trades parquet file {e}")
|
| 129 |
return None
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
# merge two dataframes
|
| 132 |
merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
# Check for duplicates
|
| 135 |
-
duplicates
|
| 136 |
-
|
| 137 |
-
# Print the duplicates
|
| 138 |
-
print(duplicates)
|
| 139 |
-
|
| 140 |
-
# Get the number of duplicates
|
| 141 |
-
num_duplicates = duplicates.sum()
|
| 142 |
-
print("Number of duplicates:", num_duplicates)
|
| 143 |
-
|
| 144 |
-
# Get the rows with duplicates
|
| 145 |
-
duplicate_rows = merge_df[duplicates]
|
| 146 |
-
print("Duplicate rows:\n", duplicate_rows)
|
| 147 |
|
| 148 |
# Remove duplicates
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
# save the parquet file
|
| 152 |
merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
return merge_df
|
| 155 |
|
| 156 |
|
| 157 |
-
def update_tools_parquet(
|
| 158 |
try:
|
| 159 |
old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
|
| 160 |
except Exception as e:
|
| 161 |
print(f"Error reading old tools parquet file {e}")
|
| 162 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
# merge two dataframes
|
| 165 |
merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
|
| 166 |
|
| 167 |
# Check for duplicates
|
| 168 |
-
duplicates
|
| 169 |
-
|
| 170 |
-
# Print the duplicates
|
| 171 |
-
print(duplicates)
|
| 172 |
-
|
| 173 |
-
# Get the number of duplicates
|
| 174 |
-
num_duplicates = duplicates.sum()
|
| 175 |
-
print("Number of duplicates:", num_duplicates)
|
| 176 |
-
|
| 177 |
-
# Get the rows with duplicates
|
| 178 |
-
duplicate_rows = merge_df[duplicates]
|
| 179 |
-
print("Duplicate rows:\n", duplicate_rows)
|
| 180 |
|
| 181 |
# Remove duplicates
|
| 182 |
merge_df.drop_duplicates(inplace=True)
|
|
|
|
| 183 |
|
| 184 |
# save the parquet file
|
| 185 |
merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
|
|
@@ -290,6 +310,13 @@ def get_mech_events_since_last_run():
|
|
| 290 |
int(latest_timestamp.timestamp()),
|
| 291 |
int((latest_timestamp + five_seconds).timestamp()),
|
| 292 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
last_block_number = get_last_block_number()
|
| 294 |
|
| 295 |
# mech requests
|
|
|
|
| 120 |
merge_json_files("tools_info.json", "new_tools_info.json")
|
| 121 |
|
| 122 |
|
| 123 |
+
def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
|
| 124 |
# Read old trades parquet file
|
| 125 |
try:
|
| 126 |
old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
|
|
|
|
| 128 |
print(f"Error reading old trades parquet file {e}")
|
| 129 |
return None
|
| 130 |
|
| 131 |
+
try:
|
| 132 |
+
new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"Error reading new trades parquet file {e}")
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
# merge two dataframes
|
| 138 |
merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
|
| 139 |
+
# avoid numpy objects
|
| 140 |
+
merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
|
| 141 |
+
bool
|
| 142 |
+
)
|
| 143 |
+
merge_df["fpmm.isPendingArbitration"] = merge_df[
|
| 144 |
+
"fpmm.isPendingArbitration"
|
| 145 |
+
].astype(bool)
|
| 146 |
|
| 147 |
# Check for duplicates
|
| 148 |
+
print(f"Initial length before removing duplicates= {len(merge_df)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
# Remove duplicates
|
| 151 |
+
# fpmm.outcomes is a numpy array
|
| 152 |
+
merge_df = merge_df.drop_duplicates(
|
| 153 |
+
subset=[col for col in merge_df.columns if col != "fpmm.outcomes"]
|
| 154 |
+
)
|
| 155 |
+
print(f"Final length before removing duplicates= {len(merge_df)}")
|
| 156 |
|
| 157 |
# save the parquet file
|
| 158 |
merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
|
| 159 |
|
| 160 |
+
return
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
|
| 164 |
+
# Read old all_trades parquet file
|
| 165 |
+
try:
|
| 166 |
+
old_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Error reading old trades parquet file {e}")
|
| 169 |
+
return None
|
| 170 |
+
# merge two dataframes
|
| 171 |
+
merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
|
| 172 |
+
|
| 173 |
+
# Check for duplicates
|
| 174 |
+
print(f"Initial length before removing duplicates= {len(merge_df)}")
|
| 175 |
+
|
| 176 |
+
# Remove duplicates
|
| 177 |
+
merge_df.drop_duplicates(inplace=True)
|
| 178 |
+
print(f"Final length before removing duplicates= {len(merge_df)}")
|
| 179 |
return merge_df
|
| 180 |
|
| 181 |
|
| 182 |
+
def update_tools_parquet(new_tools_filename: pd.DataFrame):
|
| 183 |
try:
|
| 184 |
old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
|
| 185 |
except Exception as e:
|
| 186 |
print(f"Error reading old tools parquet file {e}")
|
| 187 |
return None
|
| 188 |
+
try:
|
| 189 |
+
new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(f"Error reading new trades parquet file {e}")
|
| 192 |
+
return None
|
| 193 |
|
| 194 |
# merge two dataframes
|
| 195 |
merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
|
| 196 |
|
| 197 |
# Check for duplicates
|
| 198 |
+
print(f"Initial length before removing duplicates= {len(merge_df)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
# Remove duplicates
|
| 201 |
merge_df.drop_duplicates(inplace=True)
|
| 202 |
+
print(f"Final length before removing duplicates= {len(merge_df)}")
|
| 203 |
|
| 204 |
# save the parquet file
|
| 205 |
merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
|
|
|
|
| 310 |
int(latest_timestamp.timestamp()),
|
| 311 |
int((latest_timestamp + five_seconds).timestamp()),
|
| 312 |
)
|
| 313 |
+
# expecting only one block
|
| 314 |
+
last_run_block_number = last_run_block_number.get("number", "")
|
| 315 |
+
if last_run_block_number.isdigit():
|
| 316 |
+
last_run_block_number = int(last_run_block_number)
|
| 317 |
+
|
| 318 |
+
if last_run_block_number == "":
|
| 319 |
+
raise ValueError("Could not find a valid block number for last collected data")
|
| 320 |
last_block_number = get_last_block_number()
|
| 321 |
|
| 322 |
# mech requests
|
scripts/profitability.py
CHANGED
|
@@ -31,10 +31,18 @@ import os
|
|
| 31 |
from pathlib import Path
|
| 32 |
from get_mech_info import (
|
| 33 |
DATETIME_60_DAYS_AGO,
|
| 34 |
-
|
| 35 |
update_tools_parquet,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
)
|
| 37 |
-
from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
|
| 38 |
from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
|
| 39 |
from staking import label_trades_by_staking
|
| 40 |
|
|
@@ -567,13 +575,19 @@ def run_profitability_analysis(
|
|
| 567 |
print("Analysing trades...")
|
| 568 |
all_trades_df = analyse_all_traders(fpmmTrades, tools)
|
| 569 |
|
| 570 |
-
#
|
| 571 |
if merge:
|
| 572 |
-
|
| 573 |
update_tools_parquet(tools_filename)
|
|
|
|
| 574 |
|
| 575 |
# filter invalid markets. Condition: "is_invalid" is True
|
| 576 |
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
| 578 |
|
| 579 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
|
|
|
| 31 |
from pathlib import Path
|
| 32 |
from get_mech_info import (
|
| 33 |
DATETIME_60_DAYS_AGO,
|
| 34 |
+
update_fpmmTrades_parquet,
|
| 35 |
update_tools_parquet,
|
| 36 |
+
update_all_trades_parquet,
|
| 37 |
+
)
|
| 38 |
+
from utils import (
|
| 39 |
+
SUBGRAPH_API_KEY,
|
| 40 |
+
wei_to_unit,
|
| 41 |
+
convert_hex_to_int,
|
| 42 |
+
_to_content,
|
| 43 |
+
read_parquet_files,
|
| 44 |
+
JSON_DATA_DIR,
|
| 45 |
)
|
|
|
|
| 46 |
from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
|
| 47 |
from staking import label_trades_by_staking
|
| 48 |
|
|
|
|
| 575 |
print("Analysing trades...")
|
| 576 |
all_trades_df = analyse_all_traders(fpmmTrades, tools)
|
| 577 |
|
| 578 |
+
# merge previous files if requested
|
| 579 |
if merge:
|
| 580 |
+
update_fpmmTrades_parquet(trades_filename)
|
| 581 |
update_tools_parquet(tools_filename)
|
| 582 |
+
all_trades_df = update_all_trades_parquet(all_trades_df)
|
| 583 |
|
| 584 |
# filter invalid markets. Condition: "is_invalid" is True
|
| 585 |
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
| 586 |
+
if merge:
|
| 587 |
+
try:
|
| 588 |
+
old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
|
| 589 |
+
merge_df = pd.concat([old_invalid_trades, invalid_trades], ignore_index=True)
|
| 590 |
+
invalid_trades = merge_df.drop_duplicates()
|
| 591 |
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
| 592 |
|
| 593 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
scripts/pull_data.py
CHANGED
|
@@ -141,9 +141,8 @@ def only_new_weekly_analysis():
|
|
| 141 |
|
| 142 |
add_current_answer("new_tools.parquet")
|
| 143 |
|
| 144 |
-
# Run profitability analysis
|
| 145 |
logging.info("Running profitability analysis")
|
| 146 |
-
|
| 147 |
run_profitability_analysis(
|
| 148 |
rpc=rpc,
|
| 149 |
tools_filename="new_tools.parquet",
|
|
@@ -213,7 +212,8 @@ def weekly_analysis():
|
|
| 213 |
|
| 214 |
|
| 215 |
if __name__ == "__main__":
|
| 216 |
-
|
|
|
|
| 217 |
# rpc = RPC
|
| 218 |
# updating_timestamps(rpc)
|
| 219 |
# compute_tools_accuracy()
|
|
|
|
| 141 |
|
| 142 |
add_current_answer("new_tools.parquet")
|
| 143 |
|
| 144 |
+
# # Run profitability analysis
|
| 145 |
logging.info("Running profitability analysis")
|
|
|
|
| 146 |
run_profitability_analysis(
|
| 147 |
rpc=rpc,
|
| 148 |
tools_filename="new_tools.parquet",
|
|
|
|
| 212 |
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
| 215 |
+
only_new_weekly_analysis()
|
| 216 |
+
# weekly_analysis()
|
| 217 |
# rpc = RPC
|
| 218 |
# updating_timestamps(rpc)
|
| 219 |
# compute_tools_accuracy()
|
scripts/utils.py
CHANGED
|
@@ -428,3 +428,33 @@ def _to_content(q: str) -> dict[str, Any]:
|
|
| 428 |
"extensions": {"headers": None},
|
| 429 |
}
|
| 430 |
return finalized_query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
"extensions": {"headers": None},
|
| 429 |
}
|
| 430 |
return finalized_query
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
def read_parquet_files(tools_filename: str, trades_filename: str):
|
| 434 |
+
# Check if tools.parquet is in the same directory
|
| 435 |
+
try:
|
| 436 |
+
tools = pd.read_parquet(DATA_DIR / tools_filename)
|
| 437 |
+
|
| 438 |
+
# make sure creator_address is in the columns
|
| 439 |
+
assert "trader_address" in tools.columns, "trader_address column not found"
|
| 440 |
+
|
| 441 |
+
# lowercase and strip creator_address
|
| 442 |
+
tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
|
| 443 |
+
|
| 444 |
+
# drop duplicates
|
| 445 |
+
tools.drop_duplicates(inplace=True)
|
| 446 |
+
|
| 447 |
+
print(f"{tools_filename} loaded")
|
| 448 |
+
except FileNotFoundError:
|
| 449 |
+
print("tools.parquet not found. Please run tools.py first.")
|
| 450 |
+
return
|
| 451 |
+
try:
|
| 452 |
+
fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
|
| 453 |
+
fpmmTrades["trader_address"] = (
|
| 454 |
+
fpmmTrades["trader_address"].str.lower().str.strip()
|
| 455 |
+
)
|
| 456 |
+
except FileNotFoundError:
|
| 457 |
+
print("fpmmsTrades.parquet not found.")
|
| 458 |
+
return
|
| 459 |
+
|
| 460 |
+
return tools, fpmmTrades
|