BrightData_SerpAPI_LinkedIn_Profile_Scraping

Paused

App Files Files Community

ElegantSolutions commited on Jun 11

Commit

dbe16be

verified ·

1 Parent(s): 80c04b5

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -66,17 +66,16 @@ def calculate_similarity(name1, name2):
 def fetch_linkedin_links(query, api_key, applicant_name):
-    """Fetches LinkedIn profile links using the correct BrightData API."""
     try:
         url = "https://api.brightdata.com/request"
-        google_search_url = f"https://www.google.com/search?q={query}"
         payload = {
-            "url": google_search_url,
-            "zone": "residential",     # Replace with your actual zone if needed
-            "format": "raw",
             "method": "GET",
-            "country": "us",           # Optional: set your preferred location
             "data_format": "html"
         }
@@ -85,14 +84,16 @@ def fetch_linkedin_links(query, api_key, applicant_name):
             "Content-Type": "application/json"
         }
-        response = requests.post(url, json=payload, headers=headers)
         response.raise_for_status()
         html = response.text
         linkedin_regex = r'https://(?:[a-z]{2,3}\.)?linkedin\.com/in/[a-zA-Z0-9\-_/]+'
-        links = re.findall(linkedin_regex, html)
-        for link in links:
             profile_name = get_name_from_url(link)
             if profile_name:
                 similarity = calculate_similarity(applicant_name, profile_name)
@@ -101,8 +102,7 @@ def fetch_linkedin_links(query, api_key, applicant_name):
         return None
     except Exception as e:
         st.error(f"Error fetching link for query '{query}': {e}")
-        return None
 def process_file(file, api_key):
     """Processes the uploaded Excel file to fetch LinkedIn profile links."""

 def fetch_linkedin_links(query, api_key, applicant_name):
+    """Fetches LinkedIn profile links using BrightData SERP scraping API."""
     try:
         url = "https://api.brightdata.com/request"
+        google_url = f"https://www.google.com/search?q={requests.utils.quote(query)}"
         payload = {
+            "url": google_url,
             "method": "GET",
+            "country": "us",  # Optional: your target country
+            "format": "raw",
             "data_format": "html"
         }
             "Content-Type": "application/json"
         }
+        response = requests.post(url, headers=headers, json=payload)
         response.raise_for_status()
         html = response.text
+        # Match standard LinkedIn profile URLs
         linkedin_regex = r'https://(?:[a-z]{2,3}\.)?linkedin\.com/in/[a-zA-Z0-9\-_/]+'
+        matches = re.findall(linkedin_regex, html)
+        for link in matches:
             profile_name = get_name_from_url(link)
             if profile_name:
                 similarity = calculate_similarity(applicant_name, profile_name)
         return None
     except Exception as e:
         st.error(f"Error fetching link for query '{query}': {e}")
+        return None
 def process_file(file, api_key):
     """Processes the uploaded Excel file to fetch LinkedIn profile links."""