8000 logic to load/export datasets on basis of YAML instructions · larray-project/larray-editor@8790d3c · GitHub
[go: up one dir, main page]

Skip to content

Commit 8790d3c

Browse files
committed
logic to load/export datasets on basis of YAML instructions
1 parent 3bcdf44 commit 8790d3c

File tree

1 file changed

+108
-3
lines changed

1 file changed

+108
-3
lines changed

larray_editor/editor.py

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import io
22
import os
33
import re
4+
import yaml
45
from datetime import datetime
56
import sys
67
from collections.abc import Sequence
@@ -41,9 +42,9 @@
4142
from qtpy.QtCore import Qt, QUrl, QSettings
4243
from qtpy.QtGui import QDesktopServices, QKeySequence
4344
from qtpy.QtWidgets import (QMainWindow, QWidget, QListWidget, QListWidgetItem, QSplitter, QFileDialog, QPushButton,
44-
QDialogButtonBox, QShortcut, QVBoxLayout, QGridLayout, QLineEdit,
45+
QDialogButtonBox, QShortcut, QVBoxLayout, QHBoxLayout, QGridLayout, QLineEdit,
4546
QCheckBox, QComboBox, QMessageBox, QDialog, QInputDialog, QLabel, QGroupBox, QRadioButton,
46-
QTreeView)
47+
QTreeView, QTextEdit, QMenu, QAction)
4748

4849
try:
4950
from qtpy.QtWidgets import QUndoStack
@@ -473,7 +474,7 @@ def initUI(self):
473474

474475
# Final button to proceed with the main task and optionally generate a copy
475476
self.proceedButton = QPushButton("Proceed", self)
476-
self.proceedButton.clicked.connect(self.proceed_and_viewdata)
477+
self.proceedButton.clicked.connect(self.proceedWithTasks)
477478
layout.addWidget(self.proceedButton)
478479

479480

@@ -510,6 +511,110 @@ def selectPath(self):
510511
self.selectedPath, _ = QFileDialog.getSaveFileName(self, "Select Path", "", "All Files (*)", options=options)
511512

512513

514+
def proceedWithTasks(self):
515+
# Preliminary functions to extract the relevant data
516+
def intersect(a, b):
517+
if isinstance(a, str):
518+
a = a.split(',')
519+
elif isinstance(a, dict):
520+
a = a.keys()
521+
return [val for val in a if val in b]
522+
523+
def extract_mat(mat, keys):
524+
intersect_keys = {}
525+
replace_keys = {}
526+
name_keys = []
527+
new_keys = []
528+
none_keys = []
529+
530+
for k in keys:
531+
# name: defines the order and the naming convention
532+
if k == 'name':
533+
name_keys = keys[k].split(',')
534+
new_keys = [n for n in name_keys if n not in mat.axes.names]
535+
536+
# select common labels
537+
else:
538+
intersect_keys[k] = intersect(keys[k], mat.axes[k].labels)
539+
# prepare dict for replacement
540+
if isinstance(keys[k], dict):
541+
replace_keys[k] = {key: keys[k][key] for key in intersect_keys[k]}
542+
for key, value in replace_keys[k].items():
543+
if value is None:
544+
replace_keys[k][key] = key
545+
none_keys.append(key)
546+
547+
mat = mat[intersect_keys]
548+
# replace labels
549+
if len(replace_keys) > 0:
550+
mat = mat.set_labels(replace_keys)
551+
552+
if len(none_keys) > 0:
553+
for nk in none_keys:
554+
mat = mat[nk]
555+
556+
# add missing dimensions
557+
if len(new_keys) > 0:
558+
for nk in new_keys:
559+
mat = la.stack({nk: mat}, nk)
560+
# put in correct order
561+
if len(name_keys) > 0:
562+
mat = mat.transpose(name_keys)
563+
return mat
564+
565+
if self.generateCopyCheckBox.isChecked():
566+
# Use UI choice as most recent info (instead of original YAML file stored in pm['dbdir'] etc.)
567+
# Reasoning/philosophy: load YAML file, maybe later use dropdown to export other formats.
568+
file_format = self.fileFormatComboBox.currentText()
569+
db_dir = self.yaml_content['dbdir']
570+
571+
# There might be multiple datasets in YAML config. Redundancy to load them *all* in viewer sequentially
572+
# Cannot see them all. So, convert dictionary to list: easier to navigate to last one (only load this).
573+
# Note: two different tasks: 1) add datasets to Editor as available larray objects, and b) view those
574+
# graphically in UI. Only makes sense to 'view' the last dataset if multiple indicators are in YAML, but
575+
# all of the datasets in the YAML needed to be added to session/editor regardless.
576+
577+
# Prepare list for easy access
578+
pm = self.yaml_content
579+
indicators_as_list = list(pm['indicators'])
580+
581+
# Prepare access to already existing datasets in editor
582+
editor = self.parent().parent() # need to be *two* class-levels higher
583+
new_data = editor.data.copy() # make a copy of dataset-dictionary before starting to add new datasets
584+
585+
from larray_eurostat import eurostat_get
586+
for code in indicators_as_list:
587+
# Adding datasets
588+
arr = eurostat_get(code, cache_dir='__array_cache__') # pulls dataset
589+
arr = extract_mat(arr, pm['indicators'][code]) # relabels dataset via YAML configs
590+
591+
new_data[code] = arr # add dataset for LArrayEditor update
592+
editor.kernel.shell.user_ns[code] = arr # add dataset to console namespace
593+
594+
# Viewing datasets: only if last indicator in YAML config
595+
if code == indicators_as_list[-1]:
596+
editor.view_expr(arr, expr=code)
597+
598+
# Export for different file format (...)
599+
if self.generateCopyCheckBox.isChecked():
600+
if file_format == 'csv':
601+
arr.to_csv(f'{db_dir}/{code}.csv')
602+
elif file_format == 'ui':
603+
# la.view(s)
604+
pass
605+
elif file_format in ['var', 'iode']:
606+
# to_var(arr, db_dir)
607+
pass
608+
elif file_format[:3] == 'xls':
609+
arr.to_excel(f'{db_dir}/{code}.xlsx')
610+
else:
611+
arr.to_hdf(f'{db_dir}/{code}')
612+
613+
# Update mapping outside for loop -- i.e. no need to do this update multiple times
614+
editor.update_mapping(new_data)
615+
self.accept()
616+
617+
513618

514619
class AbstractEditor(QMainWindow):
515620
"""Abstract Editor Window"""

0 commit comments

Comments
 (0)
0