import os content_dir = "./content" output_file = "raw_training_data.txt" with open(output_file, "w", encoding="utf-8") as f: for txt_file in os.listdir(content_dir): if txt_file.endswith(".txt"): with open(os.path.join(content_dir, txt_file), "r", encoding="utf-8") as tf: text = tf.read().strip() f.write(text + "\n") # One text per line