Skip to content

Commit 451529f

Browse files
committed
Add camo's stuff, make STATS update automatically
1 parent 48546d0 commit 451529f

File tree

5 files changed

+52
-44
lines changed

5 files changed

+52
-44
lines changed

STATS.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22

33
| Letter | Found | Percentage | Letter | Found | Percentage |
44
|--------|----------|------------|--------|----------|------------|
5-
| A | 210,308 | 46.02% | N | 164,718 | 36.05% |
6-
| B | 275,323 | 60.25% | O | 92,892 | 20.33% |
7-
| C | 266,286 | 58.27% | P | 138,575 | 30.32% |
8-
| D | 285,758 | 62.53% | Q | 135,488 | 29.65% |
9-
| E | 164,492 | 36.00% | R | 112,082 | 24.53% |
10-
| F | 282,592 | 61.84% | S | 69,426 | 15.19% |
11-
| G | 290,477 | 63.57% | T | 66,409 | 14.53% |
12-
| H | 169,906 | 37.18% | U | 28,574 | 6.25% |
13-
| I | 95,294 | 20.85% | V | 95,877 | 20.98% |
14-
| J | 268,028 | 58.65% | W | 44,491 | 9.74% |
15-
| K | 199,774 | 43.72% | X | 12,495 | 2.73% |
16-
| L | 182,013 | 39.83% | Y | 62,226 | 13.62% |
17-
| M | 209,299 | 45.80% | Z | 125,476 | 27.46% |
5+
| A | 217,914 | 47.69% | N | 164,718 | 36.05% |
6+
| B | 275,323 | 60.25% | O | 92,892 | 20.33% |
7+
| C | 266,286 | 58.27% | P | 138,575 | 30.32% |
8+
| D | 285,758 | 62.53% | Q | 135,488 | 29.65% |
9+
| E | 164,492 | 36.00% | R | 112,082 | 24.53% |
10+
| F | 282,592 | 61.84% | S | 69,426 | 15.19% |
11+
| G | 290,477 | 63.57% | T | 66,409 | 14.53% |
12+
| H | 169,906 | 37.18% | U | 28,574 | 6.25% |
13+
| I | 95,294 | 20.85% | V | 95,877 | 20.98% |
14+
| J | 268,028 | 58.65% | W | 44,491 | 9.74% |
15+
| K | 199,774 | 43.72% | X | 12,495 | 2.73% |
16+
| L | 182,013 | 39.83% | Y | 62,226 | 13.62% |
17+
| M | 209,299 | 45.80% | Z | 125,476 | 27.46% |
1818

1919
| Total Sequences Found | Percentage |
2020
|-----------------------|------------|
21-
| 4,048,279 | 34.07% |
21+
| 4,055,885 | 34.14% |

__main__.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
FOUND_FILES_PATH = os.path.join(BASE_DIR, 'found-files')
1111
LOG_FILE_PATH = os.path.join(BASE_DIR, 'console.log')
1212
ENV_FILE_PATH = os.path.join(BASE_DIR, '.env')
13+
STATS_FILE_PATH = os.path.join(BASE_DIR, 'STATS.md')
1314

1415

1516
logging.basicConfig(
@@ -23,36 +24,54 @@
2324
)
2425

2526

26-
def split_array(array: list, chunk_size: int):
27-
return [array[i:i + chunk_size] for i in range(0, len(array), chunk_size)]
28-
29-
30-
def copy_for_reviving():
31-
not_found = get_not_found_sequences('a', 'a')
32-
chunks = split_array(not_found, 64)
33-
data = '\n\n'.join(map(lambda x: f'await revive(`{'\\n'.join(x)}`);', chunks))
34-
copy(data)
35-
36-
3727
def get_all_stats():
28+
# Initialize variables
29+
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
30+
stats = {}
3831
total_overall, found_overall = 0, 0
3932

40-
for letter in 'abcdefghijklmnopqrstuvwxyz':
33+
# Step 1: Collect statistics for all letters
34+
for letter in letters:
4135
total, found, percentage = get_statistics(letter)
42-
logging.info(f'Letter {letter}: {found:<7,} / {total:,} sequences found ({percentage:.2f}%)')
43-
36+
stats[letter] = (total, found, percentage)
4437
total_overall += total
4538
found_overall += found
39+
logging.info(f'Letter {letter.upper()}: {found:<7,} / {total:,} sequences found ({percentage:.2f}%)')
4640

41+
# Calculate overall percentage
4742
overall_percentage = (found_overall / total_overall * 100) if total_overall else 0
4843
logging.info('\n--- Overall Statistics ---')
4944
logging.info(f'Total: {found_overall:,} / {total_overall:,} sequences found ({overall_percentage:.2f}%)\n\n')
5045

46+
# Step 2: Construct the table
47+
stats_lines = [
48+
"# Stats\n\n",
49+
"| Letter | Found | Percentage | Letter | Found | Percentage |\n",
50+
"|--------|----------|------------|--------|----------|------------|\n"
51+
]
52+
53+
# Pair letters: A with N, B with O, ..., M with Z
54+
for k in range(13): # 0 to 12 covers all 13 pairs
55+
letter1 = letters[k] # A to M (positions 0 to 12)
56+
letter2 = letters[k + 13] # N to Z (positions 13 to 25)
57+
found1, percentage1 = stats[letter1][1], stats[letter1][2]
58+
found2, percentage2 = stats[letter2][1], stats[letter2][2]
59+
row = f"| {letter1.upper():<6} | {found1:<8,} | {percentage1:>9.2f}% | {letter2.upper():<6} | {found2:<8,} | {percentage2:>9.2f}% |\n"
60+
stats_lines.append(row)
61+
62+
# Add overall statistics
63+
stats_lines.append("\n| Total Sequences Found | Percentage |\n")
64+
stats_lines.append("|-----------------------|------------|\n")
65+
stats_lines.append(f"| {found_overall:<21,} | {overall_percentage:>9.2f}% |\n")
66+
67+
# Step 3: Write to STATS.md
68+
with open(STATS_FILE_PATH, 'w') as stats_file:
69+
stats_file.writelines(stats_lines)
70+
5171

5272
def process_everything():
53-
sub_directory = 'other'
54-
for file_name in os.listdir(os.path.join(FOUND_FILES_PATH, sub_directory)):
55-
file_path = os.path.join(FOUND_FILES_PATH, sub_directory, file_name)
73+
for file_name in os.listdir(FOUND_FILES_PATH):
74+
file_path = os.path.join(FOUND_FILES_PATH, file_name)
5675
process_single_file(file_path)
5776

5877

database.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ def get_not_found_sequences(letter: str, second_letter: str) -> list[str]:
6060

6161

6262
def get_statistics(letter: str):
63-
with connect_db(letter) as conn:
63+
with connect_db(letter.lower()) as conn:
6464
cursor = conn.cursor()
6565
total, found = 0, 0
6666

6767
for second_letter in 'abcdefghijklmnopqrstuvwxyz':
68-
table_name = f'"{letter}{second_letter}"'
68+
table_name = f'"{letter.lower()}{second_letter}"'
6969
total_query = f'SELECT COUNT(*) FROM {table_name}'
7070
found_query = f'SELECT COUNT(*) FROM {table_name} WHERE found = 1'
7171

databases/sequences/a.db

0 Bytes
Binary file not shown.

processing.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,6 @@ def process_letter(letter: str, sequences: list[str]):
3131
logging.info(f'--- Completed letter {letter} in {time() - start_time:.2f} seconds ---\n')
3232

3333

34-
def process_directory_by_letter(subdirectory_name: str):
35-
"""Process directory where files are already separated by letters."""
36-
logging.info(f'\n--- Processing directory {subdirectory_name} where files are separated by letter ---')
37-
38-
for letter in 'abcdefghijklmnopqrstuvwxyz':
39-
file_rel_path = subdirectory_name, f'{letter}.txt'
40-
sequences = get_sequences_from_file(file_rel_path)
41-
if sequences:
42-
process_letter(letter, sequences)
43-
44-
4534
def process_single_file(file_path: str):
4635
"""Process a single file, splitting sequences by letter and then processing them."""
4736
file_name = os.path.basename(file_path)

0 commit comments

Comments
 (0)