MVP creating summary tables

This commit is contained in:
Hartmut Seichter 2024-05-27 21:09:36 +02:00
parent 7078c8255b
commit c64b2c2044
6 changed files with 82 additions and 37 deletions

View file

@ -8,8 +8,8 @@ actual values are kept in YAML files in order to version them with git.
## Usage
```sh
usage: [-h] [-m META [META ...]] [-l LANG] [-f FIELDS [FIELDS ...]] [-s SCHEMA] [-q QUERY] [-p] [--title TITLE] [-b BOOK] [--level LEVEL] [--table-gen TABLE_GEN]
[--template TEMPLATE] [-o OUT] [--legacy] [--leftcol LEFTCOL]
usage: [-h] [-m META [META ...]] [-l LANG] [-f FIELDS [FIELDS ...]] [-s SCHEMA] [-q QUERY] [-qs QUERY_SORT] [-qc QUERY_COMPOUND] [-qf QUERY_FILTER [QUERY_FILTER ...]]
[-p] [--title TITLE] [-b BOOK] [--level LEVEL] [--table-gen TABLE_GEN] [--template TEMPLATE] [-o OUT] [--legacy] [--leftcol LEFTCOL]
versatile curricula generator
@ -24,6 +24,12 @@ options:
using provided schema
-q QUERY, --query QUERY
compound query to select items
-qs QUERY_SORT, --query-sort QUERY_SORT
sort query with a min/max over a column like min:credits
-qc QUERY_COMPOUND, --query-compound QUERY_COMPOUND
create a compound from a column with multiple values/dictionaries in cells
-qf QUERY_FILTER [QUERY_FILTER ...], --query-filter QUERY_FILTER [QUERY_FILTER ...]
filter final list of columns for output
-p, --pagebreak add a pagebreak after each module
--title TITLE template for title - use curly brackets (i.e. {}) to mark where the title string is inserted
-b BOOK, --book BOOK process a whole curriculum book with sections

View file

@ -7,3 +7,4 @@
* [~] table generator
* [ ] overlay of compulsory with other modes ...
* [ ] add template based generator
* [ ] port over to structured YAML ... https://tolgee.io/platform/formats/structured_yaml

View file

@ -19,7 +19,6 @@ from tablegenerator import TableGenerator
from markdowngenerator import MarkdownGenerator
from templategenerator import TemplateGenerator
from schema import Schema
from query import Query
class CourseBuilder:
@ -47,10 +46,8 @@ class CourseBuilder:
if actual_fields == None:
actual_fields = list(schema.keys())
# in case we are running query mode
query = Query(args.query) if args.query else None
result = []
result_df = []
# iterate through meta files
for m in args.meta:
@ -66,12 +63,14 @@ class CourseBuilder:
add_pagebreak=args.pagebreak,
title_template=args.title,
first_colwidth=args.leftcol)
elif query:
print(schema.to_list_of_tuple(
elif args.query:
lot = schema.to_short_dict(
meta=yaml.load(fm,Loader=yaml.Loader),
fields=actual_fields,
lang=args.lang))
pass
lang=args.lang)
result_df.append(pd.DataFrame([lot]))
else:
MarkdownGenerator.generate_table(
table_items=schema.to_list_of_tuple(
@ -82,6 +81,43 @@ class CourseBuilder:
title_template=args.title,
first_colwidth=args.leftcol)
# query mode
if args.query and len(result_df):
# got the list
df = pd.concat(result_df,ignore_index=True)
# generate a dataframe
df_q = df.query(args.query)
# generate a compound column --query-compound column:sum
if args.query_compound:
df_q.loc[:,'form-of-instruction.sum'] = df_q['form-of-instruction'].apply(lambda x: sum(list(x.values())))
# --query-sort is parameterized as min:credits - hence direction:column
if args.query_sort:
qs = args.query_sort.split(':')
match qs[0]:
case 'min' : df_q = df_q.sort_values(by=qs[1],ascending=True,key=lambda col: min(col) if hasattr(col,'__len()__') else col)
case 'max' : df_q = df_q.sort_values(by=qs[1],ascending=False,key=lambda col: max(col) if hasattr(col,'__len()__') else col)
# filter query
if args.query_filter:
df_q = df_q.loc[:,args.query_filter]
# print(df_q.head())
q_as_md = df_q.to_markdown(tablefmt='grid',index=False)
print(q_as_md)
# # lets get crazy to create a summary table!
# df_summary = pd.DataFrame([{
# 'sum.credits': df_q['credits'].sum()
# }])
# print(df_summary.to_markdown(tablefmt='grid',index=False))
@staticmethod
def run():
@ -89,13 +125,20 @@ class CourseBuilder:
# arguments
parser = ArgumentParser(description='versatile curricula generator')
# parameters
# loading mode for internal database
parser.add_argument('-m','--meta',action="extend", nargs="+", type=str,help="course description(s) as YAML file(s)")
parser.add_argument('-l','--lang',help="Language to parse from meta file (use de or en)",default='de')
parser.add_argument('-f','--fields',help="Fields to be used, the table will be build accordingly",action="extend", nargs="+", type=str)
parser.add_argument('-s','--schema',help="using provided schema")
parser.add_argument('-q','--query',help="compound query to select items")
parser.add_argument('-s','--schema', help="using provided schema")
# query mode
parser.add_argument('-q','--query', type=str, default=None, help="compound query to select items")
parser.add_argument('-qs','--query-sort',type=str,default=None,help="sort query with a min/max over a column like min:credits")
parser.add_argument('-qc','--query-compound',type=str,default=None,help="create a compound from a column with multiple values/dictionaries in cells")
parser.add_argument('-qf','--query-filter',type=str,default=[],action="extend", nargs="+",help="filter final list of columns for output")
# create pagebreaks
parser.add_argument('-p','--pagebreak',action="store_true",help="add a pagebreak after each module")
parser.add_argument('--title',type=str,default=None,help="template for title - use curly brackets (i.e. {}) to mark where the title string is inserted")
parser.add_argument('-b','--book',type=str,help="process a whole curriculum book with sections")
@ -157,4 +200,7 @@ class CourseBuilder:
# run as main
if __name__ == '__main__':
# recommended setting for pandas
pd.options.mode.copy_on_write = True
# run
CourseBuilder.run()

View file

@ -1,20 +0,0 @@
import pandas as pd
class Query:
"""
Runs pandas.Dataframe.query() with special additions we need
for generating tables for Curricula
"""
def __init__(self,query) -> None:
self.__query = query
def run(self,table_items):
# print(table_items)
# for item in table_items:
# pass
# print(item)
# print(eval(self.__query,locals()))
pass

View file

@ -28,7 +28,7 @@ class Schema:
match self.__schema[field]['type']:
case 'str': return meta[field][lang] if self.is_translatable(field) else meta[field]['value']
case 'enum' | 'int' | 'num' | 'multikey' : return meta[field]['value']
case 'multinum': return meta[field]['value'] if hasattr(meta[field]['value'],'__iter__') else (meta[field]['value'],) # force list!
case 'multinum': return meta[field]['value'] if hasattr(meta[field]['value'],'__iter__') else [meta[field]['value'],] # force list!
def to_list_of_dict(self,meta,fields,lang):
"""
@ -49,6 +49,15 @@ class Schema:
}
for field in fields]
def to_short_dict(self,meta,fields,lang):
"""
generates a short version of dict which can easily be converted
to a pandas dataframe
"""
# dict comprehension for whole meta part
return { field : self.get_value(meta,field,lang) for field in fields }
def to_list_of_tuple(self,meta,fields,lang):
"""
generates a list of tuples with a label and value (text)

View file

@ -33,6 +33,9 @@ debug:
# | pandoc ${target_flags} -V lang:de -o ${target_de}
debug-query:
python ${coursebuilder} -s schema.yaml -m mod.cg.yaml mod.interactsys.yaml -q "item['field'] == 'kind' and item['value'] == 'elective'"
python ${coursebuilder} -s schema.yaml -m mod.cg.yaml mod.interactsys.yaml -q "kind=='compulsory'" -qs min:credits -qc form-of-instruction -qf name id credits
debug-query-book:
python ${coursebuilder} -s schema.yaml -b book.yaml -q "kind=='compulsory'" -qs min:credits -qc form-of-instruction -qf name id credits
.PHONY: clean