Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,288 +14,45 @@ from apscheduler.executors.pool import ThreadPoolExecutor
|
|
| 14 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 15 |
|
| 16 |
|
| 17 |
-
DISCORD_TOKEN = os.
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# real = os.getenv('SLACK_CHANNEL_ID_HF')
|
| 21 |
-
# test = 'C07B4KNU5BQ'
|
| 22 |
-
SLACK_CHANNEL_ID = os.getenv('SLACK_CHANNEL_ID_HF')
|
| 23 |
-
SLACK_CHANNEL_ID_TEST = 'C07B4KNU5BQ'
|
| 24 |
-
# 1259415803879751700 = test forum
|
| 25 |
-
# 1019883044724822016 = ask for help
|
| 26 |
-
ASK_FOR_HELP_CHANNEL_ID = 1019883044724822016
|
| 27 |
-
GRADIO_CHANNEL_ID = 1025174734427656283
|
| 28 |
-
ARGILLA_HELP_CHANNEL_ID = 1253640751481356330
|
| 29 |
-
DATA_DISCUSSIONS_CHANNEL_ID = 1217179426002047076
|
| 30 |
-
GIVE_HF_FEEDBACK_CHANNEL_ID = 897391062975385640
|
| 31 |
-
|
| 32 |
-
TRIGGERS = {
|
| 33 |
-
("discord bot",): ["<@U051DB2754M>"], # adam
|
| 34 |
-
("autotrain",): ["<@U01E3LEC2N7>"], # abhishek
|
| 35 |
-
("auto train",): ["<@U01E3LEC2N7>"], # abhishek
|
| 36 |
-
("competition",): ["<@U01E3LEC2N7>"], # abhishek
|
| 37 |
-
("competitions",): ["<@U01E3LEC2N7>"], # abhishek
|
| 38 |
-
("text to speech",): ["<@U039C2GANMV>"], # VB
|
| 39 |
-
("tts",): ["<@U039C2GANMV>"], # VB
|
| 40 |
-
("asr",): ["<@U039C2GANMV>"], # VB
|
| 41 |
-
("musicgen",): ["<@U039C2GANMV>"], # VB
|
| 42 |
-
("whisper",): ["<@U039C2GANMV>"], # VB
|
| 43 |
-
("speech recognition",): ["<@U039C2GANMV>"], # VB
|
| 44 |
-
("bark",): ["<@U039C2GANMV>"], # VB
|
| 45 |
-
("sentence-transformers",): ["<@U04E4DNPWG7>"], # tom aarsen
|
| 46 |
-
("sentence_transformers",): ["<@U04E4DNPWG7>"], # tom aarsen
|
| 47 |
-
("setfit",): ["<@U04E4DNPWG7>"], # tom aarsen
|
| 48 |
-
("sentence transformers",): ["<@U04E4DNPWG7>"], # tom aarsen
|
| 49 |
-
("argilla",): ["<@U076B8C7G3E>", "<@U0766H30T7F>", "<@U076MF65WEM>", "<@U0765RENPNZ>", "<@U0768QEN0LA>"], # david berenstein, natalia elvira, sara han diaz lorenzo, Gabriel Martín Blázquez
|
| 50 |
-
("distilabel",): ["<@U076B8C7G3E>", "<@U076MF65WEM>", "<@U0765RENPNZ>", "<@U0768QEN0LA>", "<@U076271MBUN>"], # david berenstein, sara han diaz lorenzo, Gabriel Martín Blázquez, Agustín Piqueres
|
| 51 |
-
("docs",): ["<@U02DATT4C5B>"], # steven liu
|
| 52 |
-
("documentation",): ["<@U02DATT4C5B>"], # steven liu
|
| 53 |
-
("gradio",): ["<@U02NMK75F1V>", "<@U04FLGQ26PQ>"], # abubakar abid, yuvraj sharma
|
| 54 |
-
("dataset", "feedback"): ["<@U0768RCHCRY>"], # ben burtenshaw
|
| 55 |
-
("git ",): ["<@U07F1NP5U0K>"], # ann huang
|
| 56 |
-
("lfs",): ["<@U07F1NP5U0K>"], # ann huang
|
| 57 |
-
("xet",): ["<@U07F1NP5U0K>"], # ann huang
|
| 58 |
-
("upload",): ["<@U07F1NP5U0K>"], # ann huang
|
| 59 |
-
("download",): ["<@U07F1NP5U0K>"], # ann huang
|
| 60 |
-
("stream",): ["<@U07F1NP5U0K>"], # ann huang
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
daily_pings = []
|
| 64 |
-
|
| 65 |
-
intents = discord.Intents.all()
|
| 66 |
-
intents.messages = True
|
| 67 |
bot = commands.Bot(command_prefix='!', intents=intents)
|
| 68 |
|
| 69 |
-
slack_client = WebClient(token=SLACK_BOT_TOKEN)
|
| 70 |
-
|
| 71 |
-
thread_mapping = {}
|
| 72 |
|
|
|
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
@bot.event
|
| 76 |
async def on_ready():
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
if message.author == bot.user:
|
| 82 |
-
return
|
| 83 |
-
|
| 84 |
-
# notification bot
|
| 85 |
-
print("on_message")
|
| 86 |
-
|
| 87 |
-
huggingfolks_role = discord.utils.get(message.guild.roles, id=897376942817419265)
|
| 88 |
-
bots_role = discord.utils.get(message.guild.roles, id=1258328471609016341)
|
| 89 |
-
if huggingfolks_role not in message.author.roles: # no need for ping if we're already discussing
|
| 90 |
-
if bots_role not in message.author.roles: # bots shouldn't trigger pings for this
|
| 91 |
-
print(" not bot ")
|
| 92 |
-
content = message.content.lower()
|
| 93 |
-
|
| 94 |
-
for trigger, mentions in TRIGGERS.items():
|
| 95 |
-
if all(word in content for word in trigger):
|
| 96 |
-
adjacent_words = extract_adjacent_words(message.content, trigger)
|
| 97 |
-
for slack_mention in mentions:
|
| 98 |
-
daily_pings.append({
|
| 99 |
-
'author': str(message.author),
|
| 100 |
-
'content': adjacent_words,
|
| 101 |
-
'channel': message.channel.name,
|
| 102 |
-
'url': message.jump_url,
|
| 103 |
-
'mention': slack_mention,
|
| 104 |
-
'trigger': trigger
|
| 105 |
-
})
|
| 106 |
-
print(f"daily pings:{daily_pings}")
|
| 107 |
-
|
| 108 |
-
# Check if the message is in a thread
|
| 109 |
-
if isinstance(message.channel, discord.Thread):
|
| 110 |
-
discord_thread_id = message.channel.id
|
| 111 |
-
# Check if there's an existing Slack thread for this Discord thread
|
| 112 |
-
# (the only Slack threads created should be for forum channel threads, not just any thread)
|
| 113 |
-
if discord_thread_id in thread_mapping:
|
| 114 |
-
slack_thread_ts = thread_mapping[discord_thread_id]
|
| 115 |
-
# post to slack only if thread already exists
|
| 116 |
-
post_to_slack_forum_version(message, SLACK_CHANNEL_ID, message.content, message.author, thread_ts=slack_thread_ts)
|
| 117 |
-
|
| 118 |
-
if message.channel.id == GIVE_HF_FEEDBACK_CHANNEL_ID:
|
| 119 |
-
post_to_slack_general(message, SLACK_CHANNEL_ID)
|
| 120 |
-
|
| 121 |
-
await bot.process_commands(message)
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
def post_to_slack_general(message, channel):
|
| 125 |
-
text = f"New post in `#give-hf-feedback` by {message.author}: {message.content}"
|
| 126 |
-
# Handle attachments if any
|
| 127 |
-
if message.attachments:
|
| 128 |
-
for attachment in message.attachments:
|
| 129 |
-
attachment_url = attachment.url
|
| 130 |
-
text += f"\nAttachment: {attachment_url}"
|
| 131 |
-
try:
|
| 132 |
-
response = slack_client.chat_postMessage(
|
| 133 |
-
channel=channel,
|
| 134 |
-
text=text,
|
| 135 |
-
)
|
| 136 |
-
return response['ts']
|
| 137 |
-
except SlackApiError as e:
|
| 138 |
-
print(f"Error posting to Slack: {e.response['error']}")
|
| 139 |
-
return None
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
def extract_adjacent_words(content, trigger):
|
| 143 |
-
words = content.split()
|
| 144 |
-
pattern = r'\s*\b'.join(map(re.escape, trigger))
|
| 145 |
-
regex = re.compile(pattern, re.IGNORECASE)
|
| 146 |
-
match = regex.search(content)
|
| 147 |
-
if match:
|
| 148 |
-
start, end = match.span()
|
| 149 |
-
before = content[:start].split()[-5:]
|
| 150 |
-
after = content[end:].split()[:5]
|
| 151 |
-
print("--------------------------------------------------------------")
|
| 152 |
-
print('...' + ' '.join(before + [match.group()] + after) + '...')
|
| 153 |
-
return '...' + ' '.join(before + [match.group()] + after) + '...'
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
@bot.event
|
| 157 |
-
async def on_thread_create(thread):
|
| 158 |
-
# (discord) must be the child thread of the CORRECT forum channel(s) (not just any thread, or any forum channel)
|
| 159 |
-
if isinstance(thread.parent, discord.ForumChannel) and thread.parent.id in {ASK_FOR_HELP_CHANNEL_ID, GRADIO_CHANNEL_ID, ARGILLA_HELP_CHANNEL_ID, DATA_DISCUSSIONS_CHANNEL_ID}:
|
| 160 |
-
discord_thread_id = thread.id
|
| 161 |
-
slack_thread_ts = post_to_slack_create_thread(
|
| 162 |
-
SLACK_CHANNEL_ID,
|
| 163 |
-
f"New forum thread started in {thread.parent.name} by {thread.owner}: *{thread.name}*\n"
|
| 164 |
-
f"{thread.jump_url}"
|
| 165 |
-
)
|
| 166 |
-
if slack_thread_ts:
|
| 167 |
-
thread_mapping[discord_thread_id] = slack_thread_ts
|
| 168 |
-
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
for attachment in message.attachments:
|
| 173 |
-
attachment_url = attachment.url
|
| 174 |
-
text += f"\nAttachment: {attachment_url}"
|
| 175 |
-
text = f"{author}" + ": " + text
|
| 176 |
try:
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
)
|
| 197 |
-
return response['ts'] # Return the Slack message timestamp (thread ID)
|
| 198 |
-
except SlackApiError as e:
|
| 199 |
-
print(f"Error posting to Slack: {e.response['error']}")
|
| 200 |
-
return None
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
@bot.command()
|
| 204 |
-
async def list_tags(ctx, forum_channel_id: int):
|
| 205 |
-
if ctx.author.id == 811235357663297546:
|
| 206 |
-
forum_channel = bot.get_channel(forum_channel_id)
|
| 207 |
-
if isinstance(forum_channel, discord.ForumChannel):
|
| 208 |
-
tags = forum_channel.available_tags
|
| 209 |
-
tag_list = [f"{tag.name} (ID: {tag.id})" for tag in tags]
|
| 210 |
-
await ctx.send(f'Available tags: {", ".join(tag_list)}')
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
# react with ✅ on slack if marked with solved tag on discord
|
| 214 |
-
SOLVED_TAG_IDS = {1026743978026094664, 1025179659215847575, 1263095032328753174, 1253641354312155208}
|
| 215 |
-
@bot.event
|
| 216 |
-
async def on_thread_update(before, after):
|
| 217 |
-
if isinstance(after.parent, discord.ForumChannel) and after.parent.id in {ASK_FOR_HELP_CHANNEL_ID, GRADIO_CHANNEL_ID, ARGILLA_HELP_CHANNEL_ID, DATA_DISCUSSIONS_CHANNEL_ID}:
|
| 218 |
-
|
| 219 |
-
before_tag_ids = {tag.id for tag in before.applied_tags}
|
| 220 |
-
after_tag_ids = {tag.id for tag in after.applied_tags}
|
| 221 |
-
|
| 222 |
-
added_tags = after_tag_ids - before_tag_ids
|
| 223 |
-
removed_tags = before_tag_ids - after_tag_ids
|
| 224 |
-
|
| 225 |
-
discord_thread_id = after.id
|
| 226 |
-
if discord_thread_id in thread_mapping:
|
| 227 |
-
slack_thread_ts = thread_mapping[discord_thread_id]
|
| 228 |
-
|
| 229 |
-
if any(tag_id in SOLVED_TAG_IDS for tag_id in added_tags):
|
| 230 |
-
react_to_slack_message(slack_thread_ts, 'white_check_mark')
|
| 231 |
-
|
| 232 |
-
if any(tag_id in SOLVED_TAG_IDS for tag_id in removed_tags):
|
| 233 |
-
unreact_to_slack_message(slack_thread_ts, 'white_check_mark')
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
def react_to_slack_message(thread_ts, emoji):
|
| 237 |
-
try:
|
| 238 |
-
response = slack_client.reactions_add(
|
| 239 |
-
channel=SLACK_CHANNEL_ID,
|
| 240 |
-
name=emoji,
|
| 241 |
-
timestamp=thread_ts
|
| 242 |
-
)
|
| 243 |
-
except SlackApiError as e:
|
| 244 |
-
print(f"Error reacting to Slack message: {e.response['error']}")
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
def unreact_to_slack_message(thread_ts, emoji):
|
| 248 |
-
try:
|
| 249 |
-
response = slack_client.reactions_remove(
|
| 250 |
-
channel=SLACK_CHANNEL_ID,
|
| 251 |
-
name=emoji,
|
| 252 |
-
timestamp=thread_ts
|
| 253 |
-
)
|
| 254 |
-
except SlackApiError as e:
|
| 255 |
-
print(f"Error removing reaction from Slack message: {e.response['error']}")
|
| 256 |
-
|
| 257 |
-
#----------------------------------------------------------------------------------------------
|
| 258 |
-
|
| 259 |
-
def send_daily_pings():
|
| 260 |
-
global daily_pings
|
| 261 |
-
if daily_pings:
|
| 262 |
-
print(f"sending daily pings...{daily_pings}")
|
| 263 |
-
pings_by_mention = {}
|
| 264 |
-
|
| 265 |
-
# group pings by who they are meant to notify
|
| 266 |
-
for ping in daily_pings:
|
| 267 |
-
mention = ping['mention']
|
| 268 |
-
if mention not in pings_by_mention:
|
| 269 |
-
pings_by_mention[mention] = []
|
| 270 |
-
pings_by_mention[mention].append(ping)
|
| 271 |
-
|
| 272 |
-
# send each group of pings in a separate thread
|
| 273 |
-
for mention, pings in pings_by_mention.items():
|
| 274 |
-
main_message = slack_client.chat_postMessage(
|
| 275 |
-
channel=SLACK_CHANNEL_ID,
|
| 276 |
-
text=f"DAILY PINGS FOR {mention} ON {datetime.now().strftime('%d/%m/%Y')}",
|
| 277 |
-
unfurl_links=False,
|
| 278 |
-
unfurl_media=False
|
| 279 |
-
)
|
| 280 |
-
time.sleep(2) # https://api.slack.com/apis/rate-limits
|
| 281 |
-
main_ts = main_message['ts']
|
| 282 |
-
for ping in pings:
|
| 283 |
-
slack_client.chat_postMessage(
|
| 284 |
-
channel=SLACK_CHANNEL_ID,
|
| 285 |
-
text=f"(for the keyword -> '{ping['trigger']}')\nFrom {ping['author']} in channel #{ping['channel']}: {ping['content']}\n{ping['url']}",
|
| 286 |
-
thread_ts=main_ts,
|
| 287 |
-
unfurl_links=False,
|
| 288 |
-
unfurl_media=False
|
| 289 |
-
)
|
| 290 |
-
time.sleep(2) # https://api.slack.com/apis/rate-limits
|
| 291 |
-
|
| 292 |
-
daily_pings = [] # reset after posting
|
| 293 |
-
|
| 294 |
-
# pings -------------------------------------------------------------------------------------------
|
| 295 |
-
executor = ThreadPoolExecutor(max_workers=1)
|
| 296 |
-
scheduler = BackgroundScheduler(executors={'default': executor})
|
| 297 |
-
scheduler.add_job(send_daily_pings, trigger='interval', days=1)
|
| 298 |
-
scheduler.start()
|
| 299 |
|
| 300 |
|
| 301 |
# runs discord bot in thread = helps avoid blocking calls
|
|
|
|
| 14 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 15 |
|
| 16 |
|
| 17 |
+
DISCORD_TOKEN = os.environ.get("DISCORD_TOKEN", None)
|
| 18 |
+
intents = discord.Intents.all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
bot = commands.Bot(command_prefix='!', intents=intents)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
global_df = pd.DataFrame()
|
| 23 |
+
test_merge = pd.read_csv("https://docs.google.com/spreadsheets/d/1C8aLqgCqLYcMiIFf-P_Aosaa03C_WLIB_UyqvjSdWg8/export?format=csv&gid=0")
|
| 24 |
|
| 25 |
|
| 26 |
@bot.event
|
| 27 |
async def on_ready():
|
| 28 |
+
global global_df
|
| 29 |
+
"""import data from google sheets -> HF Space df (doesn't make API call this way, as it's read-only)"""
|
| 30 |
+
global_df = test_merge
|
| 31 |
+
print(f"csv successfully retrieved: \n {global_df}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
@tasks.loop(seconds=10)
|
| 34 |
+
async def give_verified_roles():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
try:
|
| 36 |
+
global global_df
|
| 37 |
+
guild = bot.get_guild(879548962464493619)
|
| 38 |
+
role = guild.get_role(900063512829755413)
|
| 39 |
+
org_link = "https://huggingface.co/organizations/discord-community/share/wPKRAHYbAlaEaCxUxcqVyaaaeZcYagDvqc"
|
| 40 |
+
invite_message = "Click to join our community org on the HF Hub!"
|
| 41 |
+
await guild.chunk()
|
| 42 |
+
for index, row in global_df.iterrows():
|
| 43 |
+
hf_user_name = row['hf_user_name']
|
| 44 |
+
if pd.notna(hf_user_name) and hf_user_name.lower() != 'n/a':
|
| 45 |
+
discord_id = row['discord_user_id'].strip('L')
|
| 46 |
+
member = guild.get_member(int(discord_id))
|
| 47 |
+
if not member:
|
| 48 |
+
continue
|
| 49 |
+
if role not in member.roles:
|
| 50 |
+
await member.add_roles(role)
|
| 51 |
+
lunar = bot.get_user(811235357663297546)
|
| 52 |
+
await lunar.send(f"Verified role given to {member}!")
|
| 53 |
+
await member.send(f"Verification successful! [{member} <---> {row['discord_user_name']}] \n🤗 {org_link} {invite_message}")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
# runs discord bot in thread = helps avoid blocking calls
|