import re
def extract_main_text(html):
row_regex = re.compile(r'<tr.*?>(.*?)<\/tr>', re.DOTALL)
rows = row_regex.findall(html)
# Prepare the output string
output = "Tradars Market Scanner Update:\n\nScore Flips\n\n"
bear_to_bull = []
bull_to_bear = []
# Extract data from rows
for i in range(1, len(rows)): # start from 1 to skip header
columns = re.findall(r'<td.*?>(.*?)<\/td>', rows[i])
if len(columns) >= 3:
date = re.sub(r'<.*?>', '', columns[0]).strip() # Get the date
bear_to_bull_text = re.search(r'<pre>(.*?)<\/pre>', columns[1])
bull_to_bear_text = re.search(r'<pre>(.*?)<\/pre>', columns[2])
if bear_to_bull_text and bear_to_bull_text.group(1).strip():
bear_to_bull.append(f"{bear_to_bull_text.group(1)} at 🗓 {date}")
if bull_to_bear_text and bull_to_bear_text.group(1).strip():
bull_to_bear.append(f"{bull_to_bear_text.group(1)} at 🗓 {date}")
# Append Bear to Bull scores if any exist
if bear_to_bull:
output += "Bear to Bull\n\n"
output += "\n".join(bear_to_bull) + '\n\n'
# Append Bull to Bear scores if any exist
if bull_to_bear:
output += "Bull to Bear\n\n"
output += "\n".join(bull_to_bear) + '\n\n'
output += "For more details, visit QuantBox: www.quantbox.co"
return output
# Example HTML input
html_input = """
<div id="m_5706983189435444783email" style="display:flex">
<table border="1" style="border-collapse:collapse">
<tbody>
<tr>
<th> Score Flips </th>
<th> Bear to Bull </th>
<th> Bull to Bear </th>
</tr>
<tr style="background-color:#dddddd;text-align:center">
<td style="padding-left:10px;padding-right:10px">2024-08-27 10:57:59</td>
<td style="padding-left:10px;padding-right:10px"><pre>CHFJPY: -4 ➡ -8</pre></td>
<td style="padding-left:10px;padding-right:10px"><pre></pre></td>
</tr>
<tr style="text-align:center">
<td style="padding-left:10px;padding-right:10px">2024-08-27 13:58:02</td>
<td style="padding-left:10px;padding-right:10px"><pre></pre></td>
<td style="padding-left:10px;padding-right:10px"><pre>EURAUD: -4 ➡ -5</pre></td>
</tr>
<tr style="background-color:#dddddd;text-align:center">
<td style="padding-left:10px;padding-right:10px">2024-08-27 14:58:02</td>
<td style="padding-left:10px;padding-right:10px"><pre>SPX500: 3 ➡ 5</pre></td>
<td style="padding-left:10px;padding-right:10px"><pre></pre></td>
</tr>
<tr style="text-align:center">
<td style="padding-left:10px;padding-right:10px">2024-08-27 20:58:01</td>
<td style="padding-left:10px;padding-right:10px"><pre>EURCAD: 4 ➡ 5</pre></td>
<td style="padding-left:10px;padding-right:10px"><pre></pre></td>
</tr>
</tbody>
</table>
</div>
"""
# Execute the function and print the output
formatted_output = extract_main_text(html_input)
print(formatted_output)