---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[6], line 4
2 cfg = OmegaConf.load('../config/data/image/mnist.yaml')
3 dm = instantiate(cfg)
----> 4 dm.prepare_data()
5 dm.setup()
File ~/Projects/nimrod/nimrod/image/datasets.py:415, in ImageDataModule.prepare_data(self)
412 """Download data if needed
413 """
414 # train set
--> 415 self.train_ds = ImageDataset(
416 self.hparams.name,
417 *self.args,
418 data_dir = self.hparams.data_dir,
419 split='train',
420 transforms = self.hparams.transforms,
421 **self.kwargs
422 )
423 # get num classes before setup method converst ImageDataset to Subset
424 self._num_classes = self.train_ds.num_classes
File ~/Projects/nimrod/nimrod/image/datasets.py:228, in ImageDataset.__init__(self, name, data_dir, split, transforms, streaming, exclude_grey_scale, verification_mode, from_image_folder, from_disk, *args)
226 else:
227 logger.info(f"loading dataset {name} from split {split}")
--> 228 self.hf_ds = load_dataset(
229 name,
230 *args,
231 split=split,
232 cache_dir=data_dir,
233 download_mode='reuse_dataset_if_exists',
234 streaming=streaming,
235 verification_mode=verification_mode
236
237 )
239 # CHANGE IMAGE COLUMN NAME IF NEEDED
240 self.image_column_name = 'image'
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/load.py:2129, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, trust_remote_code, **config_kwargs)
2124 verification_mode = VerificationMode(
2125 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
2126 )
2128 # Create a dataset builder
-> 2129 builder_instance = load_dataset_builder(
2130 path=path,
2131 name=name,
2132 data_dir=data_dir,
2133 data_files=data_files,
2134 cache_dir=cache_dir,
2135 features=features,
2136 download_config=download_config,
2137 download_mode=download_mode,
2138 revision=revision,
2139 token=token,
2140 storage_options=storage_options,
2141 trust_remote_code=trust_remote_code,
2142 _require_default_config_name=name is None,
2143 **config_kwargs,
2144 )
2146 # Return iterable dataset in case of streaming
2147 if streaming:
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/load.py:1886, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, storage_options, trust_remote_code, _require_default_config_name, **config_kwargs)
1884 builder_cls = get_dataset_builder_class(dataset_module, dataset_name=dataset_name)
1885 # Instantiate the dataset builder
-> 1886 builder_instance: DatasetBuilder = builder_cls(
1887 cache_dir=cache_dir,
1888 dataset_name=dataset_name,
1889 config_name=config_name,
1890 data_dir=data_dir,
1891 data_files=data_files,
1892 hash=dataset_module.hash,
1893 info=info,
1894 features=features,
1895 token=token,
1896 storage_options=storage_options,
1897 **builder_kwargs,
1898 **config_kwargs,
1899 )
1900 builder_instance._use_legacy_cache_dir_if_possible(dataset_module)
1902 return builder_instance
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/builder.py:342, in DatasetBuilder.__init__(self, cache_dir, dataset_name, config_name, hash, base_path, info, features, token, repo_id, data_files, data_dir, storage_options, writer_batch_size, **config_kwargs)
340 config_kwargs["data_dir"] = data_dir
341 self.config_kwargs = config_kwargs
--> 342 self.config, self.config_id = self._create_builder_config(
343 config_name=config_name,
344 custom_features=features,
345 **config_kwargs,
346 )
348 # prepare info: DatasetInfo are a standardized dataclass across all datasets
349 # Prefill datasetinfo
350 if info is None:
351 # TODO FOR PACKAGED MODULES IT IMPORTS DATA FROM src/packaged_modules which doesn't make sense
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/builder.py:597, in DatasetBuilder._create_builder_config(self, config_name, custom_features, **config_kwargs)
594 raise ValueError(f"BuilderConfig must have a name, got {builder_config.name}")
596 # resolve data files if needed
--> 597 builder_config._resolve_data_files(
598 base_path=self.base_path,
599 download_config=DownloadConfig(token=self.token, storage_options=self.storage_options),
600 )
602 # compute the config id that is going to be used for caching
603 config_id = builder_config.create_config_id(
604 config_kwargs,
605 custom_features=custom_features,
606 )
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/builder.py:206, in BuilderConfig._resolve_data_files(self, base_path, download_config)
204 if isinstance(self.data_files, DataFilesPatternsDict):
205 base_path = xjoin(base_path, self.data_dir) if self.data_dir else base_path
--> 206 self.data_files = self.data_files.resolve(base_path, download_config)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/data_files.py:818, in DataFilesPatternsDict.resolve(self, base_path, download_config)
816 out = DataFilesDict()
817 for key, data_files_patterns_list in self.items():
--> 818 out[key] = data_files_patterns_list.resolve(base_path, download_config)
819 return out
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/data_files.py:771, in DataFilesPatternsList.resolve(self, base_path, download_config)
768 for pattern, allowed_extensions in zip(self, self.allowed_extensions):
769 try:
770 data_files.extend(
--> 771 resolve_pattern(
772 pattern,
773 base_path=base_path,
774 allowed_extensions=allowed_extensions,
775 download_config=download_config,
776 )
777 )
778 except FileNotFoundError:
779 if not has_magic(pattern):
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/datasets/data_files.py:388, in resolve_pattern(pattern, base_path, allowed_extensions, download_config)
383 if protocol == "hf" and config.HF_HUB_VERSION >= version.parse("0.20.0"):
384 # 10 times faster glob with detail=True (ignores costly info like lastCommit)
385 glob_kwargs["expand_info"] = False
386 matched_paths = [
387 filepath if filepath.startswith(protocol_prefix) else protocol_prefix + filepath
--> 388 for filepath, info in fs.glob(pattern, detail=True, **glob_kwargs).items()
389 if info["type"] == "file"
390 and (xbasename(filepath) not in files_to_ignore)
391 and not _is_inside_unrequested_special_dir(filepath, fs_pattern)
392 and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(filepath, fs_pattern)
393 ] # ignore .ipynb and __pycache__, but keep /../
394 if allowed_extensions is not None:
395 out = [
396 filepath
397 for filepath in matched_paths
398 if any("." + suffix in allowed_extensions for suffix in xbasename(filepath).split(".")[1:])
399 ]
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py:521, in HfFileSystem.glob(self, path, **kwargs)
519 kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
520 path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
--> 521 return super().glob(path, **kwargs)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/fsspec/spec.py:611, in AbstractFileSystem.glob(self, path, maxdepth, **kwargs)
608 else:
609 depth = None
--> 611 allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
613 pattern = glob_translate(path + ("/" if ends_with_sep else ""))
614 pattern = re.compile(pattern)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py:556, in HfFileSystem.find(self, path, maxdepth, withdirs, detail, refresh, revision, **kwargs)
533 """
534 List all files below path.
535
(...)
553 `Union[List[str], Dict[str, Dict[str, Any]]]`: List of paths or dict of file information.
554 """
555 if maxdepth:
--> 556 return super().find(
557 path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, refresh=refresh, revision=revision, **kwargs
558 )
559 resolved_path = self.resolve_path(path, revision=revision)
560 path = resolved_path.unresolve()
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/fsspec/spec.py:502, in AbstractFileSystem.find(self, path, maxdepth, withdirs, detail, **kwargs)
499 # Add the root directory if withdirs is requested
500 # This is needed for posix glob compliance
501 if withdirs and path != "" and self.isdir(path):
--> 502 out[path] = self.info(path)
504 for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
505 if withdirs:
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py:719, in HfFileSystem.info(self, path, refresh, revision, **kwargs)
717 out = out1[0]
718 if refresh or out is None or (expand_info and out and out["last_commit"] is None):
--> 719 paths_info = self._api.get_paths_info(
720 resolved_path.repo_id,
721 resolved_path.path_in_repo,
722 expand=expand_info,
723 revision=resolved_path.revision,
724 repo_type=resolved_path.repo_type,
725 )
726 if not paths_info:
727 _raise_file_not_found(path, None)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
111 if check_use_auth_token:
112 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 114 return fn(*args, **kwargs)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/huggingface_hub/hf_api.py:3295, in HfApi.get_paths_info(self, repo_id, paths, expand, revision, repo_type, token)
3292 revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
3293 headers = self._build_hf_headers(token=token)
-> 3295 response = get_session().post(
3296 f"{self.endpoint}/api/{repo_type}s/{repo_id}/paths-info/{revision}",
3297 data={
3298 "paths": paths if isinstance(paths, list) else [paths],
3299 "expand": expand,
3300 },
3301 headers=headers,
3302 )
3303 hf_raise_for_status(response)
3304 paths_info = response.json()
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/sessions.py:637, in Session.post(self, url, data, json, **kwargs)
626 def post(self, url, data=None, json=None, **kwargs):
627 r"""Sends a POST request. Returns :class:`Response` object.
628
629 :param url: URL for the new :class:`Request` object.
(...)
634 :rtype: requests.Response
635 """
--> 637 return self.request("POST", url, data=data, json=json, **kwargs)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/sessions.py:724, in Session.send(self, request, **kwargs)
721 if allow_redirects:
722 # Redirect resolving generator.
723 gen = self.resolve_redirects(r, request, **kwargs)
--> 724 history = [resp for resp in gen]
725 else:
726 history = []
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/sessions.py:724, in <listcomp>(.0)
721 if allow_redirects:
722 # Redirect resolving generator.
723 gen = self.resolve_redirects(r, request, **kwargs)
--> 724 history = [resp for resp in gen]
725 else:
726 history = []
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/sessions.py:265, in SessionRedirectMixin.resolve_redirects(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs)
263 yield req
264 else:
--> 265 resp = self.send(
266 req,
267 stream=stream,
268 timeout=timeout,
269 verify=verify,
270 cert=cert,
271 proxies=proxies,
272 allow_redirects=False,
273 **adapter_kwargs,
274 )
276 extract_cookies_to_jar(self.cookies, prepared_request, resp.raw)
278 # extract redirect url, if any, for the next loop
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:93, in UniqueRequestIdAdapter.send(self, request, *args, **kwargs)
91 """Catch any RequestException to append request id to the error message for debugging."""
92 try:
---> 93 return super().send(request, *args, **kwargs)
94 except requests.RequestException as e:
95 request_id = request.headers.get(X_AMZN_TRACE_ID)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
664 timeout = TimeoutSauce(connect=timeout, read=timeout)
666 try:
--> 667 resp = conn.urlopen(
668 method=request.method,
669 url=url,
670 body=request.body,
671 headers=request.headers,
672 redirect=False,
673 assert_same_host=False,
674 preload_content=False,
675 decode_content=False,
676 retries=self.max_retries,
677 timeout=timeout,
678 chunked=chunked,
679 )
681 except (ProtocolError, OSError) as err:
682 raise ConnectionError(err, request=request)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/urllib3/connectionpool.py:787, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
784 response_conn = conn if not release_conn else None
786 # Make the request on the HTTPConnection object
--> 787 response = self._make_request(
788 conn,
789 method,
790 url,
791 timeout=timeout_obj,
792 body=body,
793 headers=headers,
794 chunked=chunked,
795 retries=retries,
796 response_conn=response_conn,
797 preload_content=preload_content,
798 decode_content=decode_content,
799 **response_kw,
800 )
802 # Everything went great!
803 clean_exit = True
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/urllib3/connectionpool.py:534, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
532 # Receive the response from the server
533 try:
--> 534 response = conn.getresponse()
535 except (BaseSSLError, OSError) as e:
536 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File ~/miniconda3/envs/nimrod/lib/python3.11/site-packages/urllib3/connection.py:516, in HTTPConnection.getresponse(self)
513 _shutdown = getattr(self.sock, "shutdown", None)
515 # Get the response from http.client.HTTPConnection
--> 516 httplib_response = super().getresponse()
518 try:
519 assert_header_parsing(httplib_response.msg)
File ~/miniconda3/envs/nimrod/lib/python3.11/http/client.py:1390, in HTTPConnection.getresponse(self)
1388 try:
1389 try:
-> 1390 response.begin()
1391 except ConnectionError:
1392 self.close()
File ~/miniconda3/envs/nimrod/lib/python3.11/http/client.py:325, in HTTPResponse.begin(self)
323 # read until we get a non-100 response
324 while True:
--> 325 version, status, reason = self._read_status()
326 if status != CONTINUE:
327 break
File ~/miniconda3/envs/nimrod/lib/python3.11/http/client.py:286, in HTTPResponse._read_status(self)
285 def _read_status(self):
--> 286 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
287 if len(line) > _MAXLINE:
288 raise LineTooLong("status line")
File ~/miniconda3/envs/nimrod/lib/python3.11/socket.py:706, in SocketIO.readinto(self, b)
704 while True:
705 try:
--> 706 return self._sock.recv_into(b)
707 except timeout:
708 self._timeout_occurred = True
File ~/miniconda3/envs/nimrod/lib/python3.11/ssl.py:1314, in SSLSocket.recv_into(self, buffer, nbytes, flags)
1310 if flags != 0:
1311 raise ValueError(
1312 "non-zero flags not allowed in calls to recv_into() on %s" %
1313 self.__class__)
-> 1314 return self.read(nbytes, buffer)
1315 else:
1316 return super().recv_into(buffer, nbytes, flags)
File ~/miniconda3/envs/nimrod/lib/python3.11/ssl.py:1166, in SSLSocket.read(self, len, buffer)
1164 try:
1165 if buffer is not None:
-> 1166 return self._sslobj.read(len, buffer)
1167 else:
1168 return self._sslobj.read(len)
KeyboardInterrupt: