Add camo's stuff, make STATS update automatically

GameRoMan · GameRoMan · commit 451529fbc3ea · 2025-03-24T15:57:17.000Z
diff --git a/STATS.md b/STATS.md
@@ -2,20 +2,20 @@
 
 | Letter | Found    | Percentage | Letter | Found    | Percentage |
 |--------|----------|------------|--------|----------|------------|
-| A      | 210,308  | 46.02%     | N      | 164,718  | 36.05%     |
-| B      | 275,323  | 60.25%     | O      | 92,892   | 20.33%     |
-| C      | 266,286  | 58.27%     | P      | 138,575  | 30.32%     |
-| D      | 285,758  | 62.53%     | Q      | 135,488  | 29.65%     |
-| E      | 164,492  | 36.00%     | R      | 112,082  | 24.53%     |
-| F      | 282,592  | 61.84%     | S      | 69,426   | 15.19%     |
-| G      | 290,477  | 63.57%     | T      | 66,409   | 14.53%     |
-| H      | 169,906  | 37.18%     | U      | 28,574   | 6.25%      |
-| I      | 95,294   | 20.85%     | V      | 95,877   | 20.98%     |
-| J      | 268,028  | 58.65%     | W      | 44,491   | 9.74%      |
-| K      | 199,774  | 43.72%     | X      | 12,495   | 2.73%      |
-| L      | 182,013  | 39.83%     | Y      | 62,226   | 13.62%     |
-| M      | 209,299  | 45.80%     | Z      | 125,476  | 27.46%     |
+| A      | 217,914  |     47.69% | N      | 164,718  |     36.05% |
+| B      | 275,323  |     60.25% | O      | 92,892   |     20.33% |
+| C      | 266,286  |     58.27% | P      | 138,575  |     30.32% |
+| D      | 285,758  |     62.53% | Q      | 135,488  |     29.65% |
+| E      | 164,492  |     36.00% | R      | 112,082  |     24.53% |
+| F      | 282,592  |     61.84% | S      | 69,426   |     15.19% |
+| G      | 290,477  |     63.57% | T      | 66,409   |     14.53% |
+| H      | 169,906  |     37.18% | U      | 28,574   |      6.25% |
+| I      | 95,294   |     20.85% | V      | 95,877   |     20.98% |
+| J      | 268,028  |     58.65% | W      | 44,491   |      9.74% |
+| K      | 199,774  |     43.72% | X      | 12,495   |      2.73% |
+| L      | 182,013  |     39.83% | Y      | 62,226   |     13.62% |
+| M      | 209,299  |     45.80% | Z      | 125,476  |     27.46% |
 
 | Total Sequences Found | Percentage |
 |-----------------------|------------|
-|   4,048,279             | 34.07%     |
+| 4,055,885             |     34.14% |
diff --git a/__main__.py b/__main__.py
@@ -10,6 +10,7 @@
 FOUND_FILES_PATH = os.path.join(BASE_DIR, 'found-files')
 LOG_FILE_PATH = os.path.join(BASE_DIR, 'console.log')
 ENV_FILE_PATH = os.path.join(BASE_DIR, '.env')
+STATS_FILE_PATH = os.path.join(BASE_DIR, 'STATS.md')
 
 
 logging.basicConfig(
@@ -23,36 +24,54 @@
 )
 
 
-def split_array(array: list, chunk_size: int):
-	return [array[i:i + chunk_size] for i in range(0, len(array), chunk_size)]
-
-
-def copy_for_reviving():
-	not_found = get_not_found_sequences('a', 'a')
-	chunks = split_array(not_found, 64)
-	data = '\n\n'.join(map(lambda x: f'await revive(`{'\\n'.join(x)}`);', chunks))
-	copy(data)
-
-
 def get_all_stats():
+	# Initialize variables
+	letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+	stats = {}
 	total_overall, found_overall = 0, 0
 
-	for letter in 'abcdefghijklmnopqrstuvwxyz':
+	# Step 1: Collect statistics for all letters
+	for letter in letters:
 		total, found, percentage = get_statistics(letter)
-		logging.info(f'Letter {letter}: {found:<7,} / {total:,} sequences found ({percentage:.2f}%)')
-
+		stats[letter] = (total, found, percentage)
 		total_overall += total
 		found_overall += found
+		logging.info(f'Letter {letter.upper()}: {found:<7,} / {total:,} sequences found ({percentage:.2f}%)')
 
+	# Calculate overall percentage
 	overall_percentage = (found_overall / total_overall * 100) if total_overall else 0
 	logging.info('\n--- Overall Statistics ---')
 	logging.info(f'Total: {found_overall:,} / {total_overall:,} sequences found ({overall_percentage:.2f}%)\n\n')
 
+	# Step 2: Construct the table
+	stats_lines = [
+		"# Stats\n\n",
+		"| Letter | Found    | Percentage | Letter | Found    | Percentage |\n",
+		"|--------|----------|------------|--------|----------|------------|\n"
+	]
+
+	# Pair letters: A with N, B with O, ..., M with Z
+	for k in range(13):  # 0 to 12 covers all 13 pairs
+		letter1 = letters[k]        # A to M (positions 0 to 12)
+		letter2 = letters[k + 13]   # N to Z (positions 13 to 25)
+		found1, percentage1 = stats[letter1][1], stats[letter1][2]
+		found2, percentage2 = stats[letter2][1], stats[letter2][2]
+		row = f"| {letter1.upper():<6} | {found1:<8,} | {percentage1:>9.2f}% | {letter2.upper():<6} | {found2:<8,} | {percentage2:>9.2f}% |\n"
+		stats_lines.append(row)
+
+	# Add overall statistics
+	stats_lines.append("\n| Total Sequences Found | Percentage |\n")
+	stats_lines.append("|-----------------------|------------|\n")
+	stats_lines.append(f"| {found_overall:<21,} | {overall_percentage:>9.2f}% |\n")
+
+	# Step 3: Write to STATS.md
+	with open(STATS_FILE_PATH, 'w') as stats_file:
+		stats_file.writelines(stats_lines)
+
 
 def process_everything():
-	sub_directory = 'other'
-	for file_name in os.listdir(os.path.join(FOUND_FILES_PATH, sub_directory)):
-		file_path = os.path.join(FOUND_FILES_PATH, sub_directory, file_name)
+	for file_name in os.listdir(FOUND_FILES_PATH):
+		file_path = os.path.join(FOUND_FILES_PATH, file_name)
 		process_single_file(file_path)
 
 
diff --git a/database.py b/database.py
@@ -60,12 +60,12 @@ def get_not_found_sequences(letter: str, second_letter: str) -> list[str]:
 
 
 def get_statistics(letter: str):
-	with connect_db(letter) as conn:
+	with connect_db(letter.lower()) as conn:
 		cursor = conn.cursor()
 		total, found = 0, 0
 
 		for second_letter in 'abcdefghijklmnopqrstuvwxyz':
-			table_name = f'"{letter}{second_letter}"'
+			table_name = f'"{letter.lower()}{second_letter}"'
 			total_query = f'SELECT COUNT(*) FROM {table_name}'
 			found_query = f'SELECT COUNT(*) FROM {table_name} WHERE found = 1'
 
diff --git a/databases/sequences/a.db b/databases/sequences/a.db
diff --git a/processing.py b/processing.py
@@ -31,17 +31,6 @@ def process_letter(letter: str, sequences: list[str]):
 	logging.info(f'--- Completed letter {letter} in {time() - start_time:.2f} seconds ---\n')
 
 
-def process_directory_by_letter(subdirectory_name: str):
-	"""Process directory where files are already separated by letters."""
-	logging.info(f'\n--- Processing directory {subdirectory_name} where files are separated by letter ---')
-
-	for letter in 'abcdefghijklmnopqrstuvwxyz':
-		file_rel_path = subdirectory_name, f'{letter}.txt'
-		sequences = get_sequences_from_file(file_rel_path)
-		if sequences:
-			process_letter(letter, sequences)
-
-
 def process_single_file(file_path: str):
 	"""Process a single file, splitting sequences by letter and then processing them."""
 	file_name = os.path.basename(file_path)