MarcRyan commited on
Commit
49e2901
·
verified ·
1 Parent(s): 6eb6a36

Add 3 files

Browse files
Files changed (3) hide show
  1. README.md +7 -5
  2. index.html +223 -19
  3. prompts.txt +2 -0
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Web Crawler
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: blue
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: web-crawler
3
+ emoji: 🐳
4
+ colorFrom: purple
5
+ colorTo: yellow
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,223 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Real Web Crawler Interface</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ .loading-spinner {
11
+ border: 4px solid rgba(0, 0, 0, 0.1);
12
+ border-radius: 50%;
13
+ border-top: 4px solid #3b82f6;
14
+ width: 30px;
15
+ height: 30px;
16
+ animation: spin 1s linear infinite;
17
+ margin: 0 auto;
18
+ }
19
+
20
+ @keyframes spin {
21
+ 0% { transform: rotate(0deg); }
22
+ 100% { transform: rotate(360deg); }
23
+ }
24
+ </style>
25
+ </head>
26
+ <body class="bg-gray-50 min-h-screen">
27
+ <div class="container mx-auto px-4 py-8">
28
+ <header class="text-center mb-12">
29
+ <h1 class="text-4xl font-bold text-blue-600 mb-2">
30
+ <i class="fas fa-spider mr-2"></i> Real Web Crawler
31
+ </h1>
32
+ <p class="text-gray-600 max-w-2xl mx-auto">
33
+ Enter a zip code and keywords to search for business contact information.
34
+ <br><span class="text-sm text-red-500">Note: Requires backend API implementation</span>
35
+ </p>
36
+ </header>
37
+
38
+ <div class="bg-white rounded-xl shadow-lg p-6 max-w-4xl mx-auto mb-12">
39
+ <form id="crawlerForm" class="space-y-6">
40
+ <div>
41
+ <label for="zipCode" class="block text-sm font-medium text-gray-700 mb-1">
42
+ <i class="fas fa-map-marker-alt mr-1 text-blue-500"></i> Zip Code
43
+ </label>
44
+ <input type="text" id="zipCode" name="zipCode"
45
+ class="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500 transition"
46
+ placeholder="e.g. 90210" required>
47
+ </div>
48
+
49
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-4">
50
+ <div>
51
+ <label for="keyword1" class="block text-sm font-medium text-gray-700 mb-1">
52
+ <i class="fas fa-key mr-1 text-blue-500"></i> Keyword 1
53
+ </label>
54
+ <input type="text" id="keyword1" name="keyword1"
55
+ class="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500 transition"
56
+ placeholder="e.g. dentist" required>
57
+ </div>
58
+ <div>
59
+ <label for="keyword2" class="block text-sm font-medium text-gray-700 mb-1">
60
+ <i class="fas fa-key mr-1 text-blue-500"></i> Keyword 2
61
+ </label>
62
+ <input type="text" id="keyword2" name="keyword2"
63
+ class="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500 transition"
64
+ placeholder="e.g. doctor">
65
+ </div>
66
+ <div>
67
+ <label for="keyword3" class="block text-sm font-medium text-gray-700 mb-1">
68
+ <i class="fas fa-key mr-1 text-blue-500"></i> Keyword 3
69
+ </label>
70
+ <input type="text" id="keyword3" name="keyword3"
71
+ class="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500 transition"
72
+ placeholder="e.g. lawyer">
73
+ </div>
74
+ </div>
75
+
76
+ <div class="flex items-center">
77
+ <input type="checkbox" id="deepSearch" name="deepSearch" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded">
78
+ <label for="deepSearch" class="ml-2 block text-sm text-gray-700">
79
+ Deep Search (crawl more pages, takes longer)
80
+ </label>
81
+ </div>
82
+
83
+ <button type="submit" id="searchButton" class="w-full bg-blue-600 hover:bg-blue-700 text-white font-medium py-3 px-4 rounded-lg transition flex items-center justify-center">
84
+ <i class="fas fa-search mr-2"></i> Start Crawling
85
+ </button>
86
+ </form>
87
+ </div>
88
+
89
+ <div id="loadingIndicator" class="hidden text-center py-8">
90
+ <div class="loading-spinner mb-4"></div>
91
+ <p class="text-gray-600">Crawling websites. This may take a few minutes...</p>
92
+ <p class="text-sm text-gray-500 mt-2">Currently processing: <span id="currentUrl" class="font-medium">example.com</span></p>
93
+ </div>
94
+
95
+ <div id="resultsContainer" class="hidden">
96
+ <div class="flex justify-between items-center mb-6">
97
+ <h2 class="text-2xl font-bold text-gray-800">
98
+ <i class="fas fa-list-ul mr-2 text-blue-500"></i> Crawl Results
99
+ </h2>
100
+ <div class="flex items-center space-x-4">
101
+ <div class="bg-blue-100 text-blue-800 px-3 py-1 rounded-full text-sm font-medium">
102
+ Found: <span id="resultCount">0</span> items
103
+ </div>
104
+ <button id="exportBtn" class="bg-green-100 text-green-800 px-3 py-1 rounded-full text-sm font-medium hover:bg-green-200 transition flex items-center">
105
+ <i class="fas fa-file-export mr-1"></i> Export CSV
106
+ </button>
107
+ </div>
108
+ </div>
109
+
110
+ <div id="resultsList" class="space-y-4">
111
+ <!-- Results will be inserted here by JavaScript -->
112
+ </div>
113
+ </div>
114
+
115
+ <div id="noResults" class="hidden text-center py-12">
116
+ <div class="mx-auto w-24 h-24 bg-gray-100 rounded-full flex items-center justify-center mb-4">
117
+ <i class="fas fa-exclamation-triangle text-3xl text-yellow-500"></i>
118
+ </div>
119
+ <h3 class="text-xl font-medium text-gray-700 mb-2">No Results Found</h3>
120
+ <p class="text-gray-500 max-w-md mx-auto">
121
+ We couldn't find any matching results for your search criteria.
122
+ </p>
123
+ </div>
124
+ </div>
125
+
126
+ <script>
127
+ document.addEventListener('DOMContentLoaded', function() {
128
+ const crawlerForm = document.getElementById('crawlerForm');
129
+ const loadingIndicator = document.getElementById('loadingIndicator');
130
+ const resultsContainer = document.getElementById('resultsContainer');
131
+ const noResults = document.getElementById('noResults');
132
+ const resultsList = document.getElementById('resultsList');
133
+ const resultCount = document.getElementById('resultCount');
134
+ const currentUrl = document.getElementById('currentUrl');
135
+
136
+ crawlerForm.addEventListener('submit', async function(e) {
137
+ e.preventDefault();
138
+
139
+ const zipCode = document.getElementById('zipCode').value.trim();
140
+ const keyword1 = document.getElementById('keyword1').value.trim();
141
+ const keyword2 = document.getElementById('keyword2').value.trim();
142
+ const keyword3 = document.getElementById('keyword3').value.trim();
143
+ const deepSearch = document.getElementById('deepSearch').checked;
144
+
145
+ const keywords = [keyword1, keyword2, keyword3].filter(k => k);
146
+
147
+ if (!zipCode) {
148
+ alert('Please enter a zip code');
149
+ return;
150
+ }
151
+
152
+ if (keywords.length === 0) {
153
+ alert('Please enter at least one keyword');
154
+ return;
155
+ }
156
+
157
+ // Show loading state
158
+ loadingIndicator.classList.remove('hidden');
159
+ resultsContainer.classList.add('hidden');
160
+ noResults.classList.add('hidden');
161
+
162
+ try {
163
+ // In a real implementation, this would call your backend API
164
+ // const response = await fetch('/api/crawl', {
165
+ // method: 'POST',
166
+ // headers: {
167
+ // 'Content-Type': 'application/json',
168
+ // },
169
+ // body: JSON.stringify({
170
+ // zipCode,
171
+ // keywords,
172
+ // deepSearch
173
+ // })
174
+ // });
175
+ // const data = await response.json();
176
+
177
+ // For demonstration, we'll simulate an API delay
178
+ await new Promise(resolve => setTimeout(resolve, 3000));
179
+
180
+ // This is where you would process real data from your backend
181
+ // displayResults(data.results);
182
+
183
+ // For now, show a message about backend implementation
184
+ resultsList.innerHTML = `
185
+ <div class="bg-white p-6 rounded-lg shadow">
186
+ <h3 class="text-lg font-medium text-gray-800 mb-2">
187
+ <i class="fas fa-code mr-2 text-blue-500"></i> Backend Implementation Required
188
+ </h3>
189
+ <p class="text-gray-600">
190
+ This interface is ready to connect to a backend API that would perform the actual web crawling.
191
+ To implement this, you would need to:
192
+ </p>
193
+ <ul class="list-disc pl-5 mt-2 space-y-1 text-gray-600">
194
+ <li>Set up a server with Node.js, Python, or another backend language</li>
195
+ <li>Implement proper web crawling logic respecting robots.txt</li>
196
+ <li>Add rate limiting to avoid being blocked</li>
197
+ <li>Implement data parsing for emails, phone numbers, etc.</li>
198
+ <li>Set up the API endpoint this frontend would call</li>
199
+ </ul>
200
+ <div class="mt-4 p-4 bg-blue-50 rounded border border-blue-200">
201
+ <p class="text-blue-800">
202
+ <i class="fas fa-info-circle mr-2"></i>
203
+ The search parameters received were:<br>
204
+ Zip Code: ${zipCode}<br>
205
+ Keywords: ${keywords.join(', ')}<br>
206
+ Deep Search: ${deepSearch ? 'Yes' : 'No'}
207
+ </p>
208
+ </div>
209
+ </div>
210
+ `;
211
+
212
+ resultsContainer.classList.remove('hidden');
213
+ loadingIndicator.classList.add('hidden');
214
+ } catch (error) {
215
+ console.error('Error:', error);
216
+ alert('An error occurred. Check console for details.');
217
+ loadingIndicator.classList.add('hidden');
218
+ }
219
+ });
220
+ });
221
+ </script>
222
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=MarcRyan/web-crawler" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
223
+ </html>
prompts.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ lets build a webcrawler where i input a zip code and array of 3 keywords inputs and it outputs a list of all the emails and names and phone numbers and weblinks that are discovered during the crawl
2
+ ENSURE THE RESULTS ARE REAL LIFE AND TRUE FUNCTIONING DATA THAT IS REAL LIFE DATA AND NOT SIMULATED