import json import os from urllib import request import geojson import pandas as pd from geopy.distance import distance from import BaseCommand from pflaenzli_django.settings import BASE_DIR class Command(BaseCommand): help = 'Get the zip code index from post and compile it to a dataframe pickle' def handle(self, *args, **options): self.parse_data(self.download_geojson(api='v2', data='v2')) self.stdout.write('Done!')) def download_geojson(self, api, data): file = f'plz_verzeichnis_{data}.json' if os.path.exists(file): self.stdout.write('File already downloaded. Skipping...\n') else: self.stdout.write('Downloading geojson...') url = f'{api}/catalog/datasets/plz_verzeichnis_{data}/exports/geojson' request.urlretrieve(url, file) self.stdout.write('Done!\n')) return file def parse_data(self, file): self.stdout.write('Opening file...') # Load the GeoJSON data for the zip codes with open(file, encoding='UTF-8') as f: full_data = json.load(f) self.stdout.write('Done!\n')) self.stdout.write('Parsing file...') zip_dict = {} for plz_entry in full_data['features']: plz_entry = plz_entry['properties'] try: plz = plz_entry['postleitzahl'] except (AttributeError, TypeError, ValueError): continue try: lat = plz_entry['geo_point_2d']['lat'] lon = plz_entry['geo_point_2d']['lon'] except (AttributeError, TypeError): continue if plz is None or lat is None or lon is None: continue zip_dict[int(plz)] = [lat, lon] self.stdout.write('Done!\n')) df = pd.DataFrame.from_dict(zip_dict, orient='index') df.columns = ['lat', 'lon'] self.stdout.write('Saving pickle...') destination = os.path.join(BASE_DIR, 'pflaenzli', 'utils', 'plz.pkl') df.to_pickle(destination) self.stdout.write('Wrote pickle to {destination}\n'))