-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
110 lines (92 loc) · 4.31 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Main entry point for the Crunchbase scraper"""
import os
from typing import List
from dotenv import load_dotenv
from src.scraper import CrunchbaseScraper
def read_company_list(filename: str = "company_list.txt") -> List[str]:
"""Read company names from file"""
try:
if not os.path.exists(filename):
print(f"Company list file '{filename}' not found.")
return []
with open(filename, 'r', encoding='utf-8') as f:
# Read lines and strip whitespace
companies = [line.strip() for line in f if line.strip()]
print(f"Loaded {len(companies)} companies from {filename}")
return companies
except Exception as e:
print(f"Error reading company list: {e}")
return []
def main():
# Load environment variables
load_dotenv()
email = os.getenv('CRUNCHBASE_EMAIL')
password = os.getenv('CRUNCHBASE_PASSWORD')
if not email or not password:
print("Please set CRUNCHBASE_EMAIL and CRUNCHBASE_PASSWORD in .env file")
return
scraper = CrunchbaseScraper(email=email, password=password, headless=False)
companies = []
try:
print("\n========= Crunchbase Scraper Activating... =========")
print("Made with ❤️ by afk-procrastinator")
if scraper.access_homepage():
print("\nReady to start searching!")
# Ask user for search mode
print("\nSearch modes:")
print("1. Search individual companies")
print("2. Process companies from company_list.txt")
while True:
mode = input("\nSelect mode (1 or 2): ").strip()
if mode in ('1', '2'):
break
print("Invalid choice. Please enter 1 or 2.")
if mode == '1':
# Individual company search mode
while True:
company_name = input("\nEnter company name to search (or 'quit' to exit): ").strip()
if company_name.lower() == 'quit':
break
print(f"\nSearching for '{company_name}'...")
if scraper.search_company(company_name):
print("Successfully opened company page")
# Scrape company data
company_data = scraper.get_company_data()
if company_data:
companies.append(company_data)
else:
print("Failed to scrape company data")
else:
print("Failed to search/open company")
else:
# Batch processing mode
company_list = read_company_list()
if company_list:
total = len(company_list)
for i, company_name in enumerate(company_list, 1):
print(f"\nProcessing company {i}/{total}: '{company_name}'")
if scraper.search_company(company_name):
print("Successfully opened company page")
# Scrape company data
company_data = scraper.get_company_data()
if company_data:
companies.append(company_data)
else:
print("Failed to scrape company data")
else:
print("Failed to search/open company")
# Save progress after each company
if companies:
scraper.save_to_csv(companies, "companies_progress.csv")
else:
print("No companies to process. Please check company_list.txt")
else:
print("Failed to access homepage")
finally:
if companies:
save = input("\nWould you like to save the final data to CSV? (y/n): ").strip().lower()
if save == 'y':
scraper.save_to_csv(companies)
scraper.close()
if __name__ == "__main__":
main()