@@ -164,35 +164,35 @@ files. Each file in the directory represents a different year of the entire data
164164.. ipython :: python 
165165   :okwarning: 
166166
167-    import  pathlib 
167+    import  glob 
168+    import  tempfile 
168169
169170   N =  12  
170171   starts =  [f " 20 { i:>02d } -01-01 "  for  i in  range (N)] 
171172   ends =  [f " 20 { i:>02d } -12-13 "  for  i in  range (N)] 
172173
173-    pathlib.Path( " data/timeseries " ).mkdir( exist_ok = True ) 
174+    tmpdir  =  tempfile.TemporaryDirectory( ignore_cleanup_errors = True ) 
174175
175176   for  i, (start, end) in  enumerate (zip (starts, ends)): 
176177       ts =  make_timeseries(start = start, end = end, freq = " 1min"  , seed = i) 
177-        ts.to_parquet(f " data/timeseries /ts-{ i:0>2d } .parquet " ) 
178+        ts.to_parquet(f " { tmpdir.name } /ts- { i:0>2d } .parquet " ) 
178179
179180
180181 ::
181182
182-    data 
183-    └── timeseries 
184-        ├── ts-00.parquet 
185-        ├── ts-01.parquet 
186-        ├── ts-02.parquet 
187-        ├── ts-03.parquet 
188-        ├── ts-04.parquet 
189-        ├── ts-05.parquet 
190-        ├── ts-06.parquet 
191-        ├── ts-07.parquet 
192-        ├── ts-08.parquet 
193-        ├── ts-09.parquet 
194-        ├── ts-10.parquet 
195-        └── ts-11.parquet 
183+    tmpdir 
184+    ├── ts-00.parquet 
185+    ├── ts-01.parquet 
186+    ├── ts-02.parquet 
187+    ├── ts-03.parquet 
188+    ├── ts-04.parquet 
189+    ├── ts-05.parquet 
190+    ├── ts-06.parquet 
191+    ├── ts-07.parquet 
192+    ├── ts-08.parquet 
193+    ├── ts-09.parquet 
194+    ├── ts-10.parquet 
195+    └── ts-11.parquet 
196196
197197Now we'll implement an out-of-core :meth: `pandas.Series.value_counts `. The peak memory usage of this
198198workflow is the single largest chunk, plus a small series storing the unique value
@@ -202,13 +202,18 @@ work for arbitrary-sized datasets.
202202.. ipython :: python 
203203
204204   %% time 
205-    files =  pathlib.Path( " data/timeseries/ " ).glob( " ts*.parquet"  ) 
205+    files =  glob.iglob( f " { tmpdir.name } / ts*.parquet" ) 
206206   counts =  pd.Series(dtype = int ) 
207207   for  path in  files: 
208208       df =  pd.read_parquet(path) 
209209       counts =  counts.add(df[" name"  ].value_counts(), fill_value = 0 ) 
210210   counts.astype(int ) 
211211
212+  .. ipython :: python 
213+    :suppress: 
214+ 
215+    tmpdir.cleanup() 
216+ 
212217 Some readers, like :meth: `pandas.read_csv `, offer parameters to control the
213218``chunksize `` when reading a single file.
214219
0 commit comments