@@ -116,7 +116,7 @@ def initialize_search_index(self) -> None:
116
116
# Download the reranking index if one is not on disk already.
117
117
logger .info ("Downloading the Reranking Dataset Index File" )
118
118
urllib .request .urlretrieve (
119
- "http://phontron.com/data/prompt2model/dataset_reranking_index .json" ,
119
+ "http://phontron.com/data/prompt2model/reranking_dataset_index .json" ,
120
120
self .reranking_dataset_info_file ,
121
121
)
122
122
with open (self .reranking_dataset_info_file , "r" ) as f :
@@ -659,12 +659,14 @@ def get_datasets_of_required_size(
659
659
prompt_spec ,
660
660
self .total_num_points_to_transform - curr_datasets_size ,
661
661
)
662
- curr_datasets_size += len (canonicalized_dataset ["train" ]["input_col" ])
663
- inputs += canonicalized_dataset ["train" ]["input_col" ]
664
- outputs += canonicalized_dataset ["train" ]["output_col" ]
665
- dataset_contributions [f"{ dataset_name } _{ config_name } " ] = len (
666
- canonicalized_dataset ["train" ]["input_col" ]
667
- )
662
+ if canonicalized_dataset is not None and "train" in canonicalized_dataset :
663
+
664
+ curr_datasets_size += len (canonicalized_dataset ["train" ]["input_col" ])
665
+ inputs += canonicalized_dataset ["train" ]["input_col" ]
666
+ outputs += canonicalized_dataset ["train" ]["output_col" ]
667
+ dataset_contributions [f"{ dataset_name } _{ config_name } " ] = len (
668
+ canonicalized_dataset ["train" ]["input_col" ]
669
+ )
668
670
669
671
if len (datasets_info [dataset_name ]["configs" ]) == 1 :
670
672
del datasets_info [dataset_name ]
0 commit comments