MVP creating summary tables

This commit is contained in:
Hartmut Seichter 2024-05-27 21:09:36 +02:00
parent 7078c8255b
commit c64b2c2044
6 changed files with 82 additions and 37 deletions

View file

@ -8,8 +8,8 @@ actual values are kept in YAML files in order to version them with git.
## Usage ## Usage
```sh ```sh
usage: [-h] [-m META [META ...]] [-l LANG] [-f FIELDS [FIELDS ...]] [-s SCHEMA] [-q QUERY] [-p] [--title TITLE] [-b BOOK] [--level LEVEL] [--table-gen TABLE_GEN] usage: [-h] [-m META [META ...]] [-l LANG] [-f FIELDS [FIELDS ...]] [-s SCHEMA] [-q QUERY] [-qs QUERY_SORT] [-qc QUERY_COMPOUND] [-qf QUERY_FILTER [QUERY_FILTER ...]]
[--template TEMPLATE] [-o OUT] [--legacy] [--leftcol LEFTCOL] [-p] [--title TITLE] [-b BOOK] [--level LEVEL] [--table-gen TABLE_GEN] [--template TEMPLATE] [-o OUT] [--legacy] [--leftcol LEFTCOL]
versatile curricula generator versatile curricula generator
@ -24,6 +24,12 @@ options:
using provided schema using provided schema
-q QUERY, --query QUERY -q QUERY, --query QUERY
compound query to select items compound query to select items
-qs QUERY_SORT, --query-sort QUERY_SORT
sort query with a min/max over a column like min:credits
-qc QUERY_COMPOUND, --query-compound QUERY_COMPOUND
create a compound from a column with multiple values/dictionaries in cells
-qf QUERY_FILTER [QUERY_FILTER ...], --query-filter QUERY_FILTER [QUERY_FILTER ...]
filter final list of columns for output
-p, --pagebreak add a pagebreak after each module -p, --pagebreak add a pagebreak after each module
--title TITLE template for title - use curly brackets (i.e. {}) to mark where the title string is inserted --title TITLE template for title - use curly brackets (i.e. {}) to mark where the title string is inserted
-b BOOK, --book BOOK process a whole curriculum book with sections -b BOOK, --book BOOK process a whole curriculum book with sections

View file

@ -6,4 +6,5 @@
* [x] add a book mode for mixing input and headers (# Blah -m mod.cg.yaml) * [x] add a book mode for mixing input and headers (# Blah -m mod.cg.yaml)
* [~] table generator * [~] table generator
* [ ] overlay of compulsory with other modes ... * [ ] overlay of compulsory with other modes ...
* [ ] add template based generator * [ ] add template based generator
* [ ] port over to structured YAML ... https://tolgee.io/platform/formats/structured_yaml

View file

@ -19,7 +19,6 @@ from tablegenerator import TableGenerator
from markdowngenerator import MarkdownGenerator from markdowngenerator import MarkdownGenerator
from templategenerator import TemplateGenerator from templategenerator import TemplateGenerator
from schema import Schema from schema import Schema
from query import Query
class CourseBuilder: class CourseBuilder:
@ -47,10 +46,8 @@ class CourseBuilder:
if actual_fields == None: if actual_fields == None:
actual_fields = list(schema.keys()) actual_fields = list(schema.keys())
# in case we are running query mode
query = Query(args.query) if args.query else None
result = [] result_df = []
# iterate through meta files # iterate through meta files
for m in args.meta: for m in args.meta:
@ -66,12 +63,14 @@ class CourseBuilder:
add_pagebreak=args.pagebreak, add_pagebreak=args.pagebreak,
title_template=args.title, title_template=args.title,
first_colwidth=args.leftcol) first_colwidth=args.leftcol)
elif query: elif args.query:
print(schema.to_list_of_tuple(
lot = schema.to_short_dict(
meta=yaml.load(fm,Loader=yaml.Loader), meta=yaml.load(fm,Loader=yaml.Loader),
fields=actual_fields, fields=actual_fields,
lang=args.lang)) lang=args.lang)
pass
result_df.append(pd.DataFrame([lot]))
else: else:
MarkdownGenerator.generate_table( MarkdownGenerator.generate_table(
table_items=schema.to_list_of_tuple( table_items=schema.to_list_of_tuple(
@ -82,6 +81,43 @@ class CourseBuilder:
title_template=args.title, title_template=args.title,
first_colwidth=args.leftcol) first_colwidth=args.leftcol)
# query mode
if args.query and len(result_df):
# got the list
df = pd.concat(result_df,ignore_index=True)
# generate a dataframe
df_q = df.query(args.query)
# generate a compound column --query-compound column:sum
if args.query_compound:
df_q.loc[:,'form-of-instruction.sum'] = df_q['form-of-instruction'].apply(lambda x: sum(list(x.values())))
# --query-sort is parameterized as min:credits - hence direction:column
if args.query_sort:
qs = args.query_sort.split(':')
match qs[0]:
case 'min' : df_q = df_q.sort_values(by=qs[1],ascending=True,key=lambda col: min(col) if hasattr(col,'__len()__') else col)
case 'max' : df_q = df_q.sort_values(by=qs[1],ascending=False,key=lambda col: max(col) if hasattr(col,'__len()__') else col)
# filter query
if args.query_filter:
df_q = df_q.loc[:,args.query_filter]
# print(df_q.head())
q_as_md = df_q.to_markdown(tablefmt='grid',index=False)
print(q_as_md)
# # lets get crazy to create a summary table!
# df_summary = pd.DataFrame([{
# 'sum.credits': df_q['credits'].sum()
# }])
# print(df_summary.to_markdown(tablefmt='grid',index=False))
@staticmethod @staticmethod
def run(): def run():
@ -89,13 +125,20 @@ class CourseBuilder:
# arguments # arguments
parser = ArgumentParser(description='versatile curricula generator') parser = ArgumentParser(description='versatile curricula generator')
# parameters # loading mode for internal database
parser.add_argument('-m','--meta',action="extend", nargs="+", type=str,help="course description(s) as YAML file(s)") parser.add_argument('-m','--meta',action="extend", nargs="+", type=str,help="course description(s) as YAML file(s)")
parser.add_argument('-l','--lang',help="Language to parse from meta file (use de or en)",default='de') parser.add_argument('-l','--lang',help="Language to parse from meta file (use de or en)",default='de')
parser.add_argument('-f','--fields',help="Fields to be used, the table will be build accordingly",action="extend", nargs="+", type=str) parser.add_argument('-f','--fields',help="Fields to be used, the table will be build accordingly",action="extend", nargs="+", type=str)
parser.add_argument('-s','--schema',help="using provided schema") parser.add_argument('-s','--schema', help="using provided schema")
parser.add_argument('-q','--query',help="compound query to select items")
# query mode
parser.add_argument('-q','--query', type=str, default=None, help="compound query to select items")
parser.add_argument('-qs','--query-sort',type=str,default=None,help="sort query with a min/max over a column like min:credits")
parser.add_argument('-qc','--query-compound',type=str,default=None,help="create a compound from a column with multiple values/dictionaries in cells")
parser.add_argument('-qf','--query-filter',type=str,default=[],action="extend", nargs="+",help="filter final list of columns for output")
# create pagebreaks
parser.add_argument('-p','--pagebreak',action="store_true",help="add a pagebreak after each module") parser.add_argument('-p','--pagebreak',action="store_true",help="add a pagebreak after each module")
parser.add_argument('--title',type=str,default=None,help="template for title - use curly brackets (i.e. {}) to mark where the title string is inserted") parser.add_argument('--title',type=str,default=None,help="template for title - use curly brackets (i.e. {}) to mark where the title string is inserted")
parser.add_argument('-b','--book',type=str,help="process a whole curriculum book with sections") parser.add_argument('-b','--book',type=str,help="process a whole curriculum book with sections")
@ -157,4 +200,7 @@ class CourseBuilder:
# run as main # run as main
if __name__ == '__main__': if __name__ == '__main__':
# recommended setting for pandas
pd.options.mode.copy_on_write = True
# run
CourseBuilder.run() CourseBuilder.run()

View file

@ -1,20 +0,0 @@
import pandas as pd
class Query:
"""
Runs pandas.Dataframe.query() with special additions we need
for generating tables for Curricula
"""
def __init__(self,query) -> None:
self.__query = query
def run(self,table_items):
# print(table_items)
# for item in table_items:
# pass
# print(item)
# print(eval(self.__query,locals()))
pass

View file

@ -28,7 +28,7 @@ class Schema:
match self.__schema[field]['type']: match self.__schema[field]['type']:
case 'str': return meta[field][lang] if self.is_translatable(field) else meta[field]['value'] case 'str': return meta[field][lang] if self.is_translatable(field) else meta[field]['value']
case 'enum' | 'int' | 'num' | 'multikey' : return meta[field]['value'] case 'enum' | 'int' | 'num' | 'multikey' : return meta[field]['value']
case 'multinum': return meta[field]['value'] if hasattr(meta[field]['value'],'__iter__') else (meta[field]['value'],) # force list! case 'multinum': return meta[field]['value'] if hasattr(meta[field]['value'],'__iter__') else [meta[field]['value'],] # force list!
def to_list_of_dict(self,meta,fields,lang): def to_list_of_dict(self,meta,fields,lang):
""" """
@ -49,6 +49,15 @@ class Schema:
} }
for field in fields] for field in fields]
def to_short_dict(self,meta,fields,lang):
"""
generates a short version of dict which can easily be converted
to a pandas dataframe
"""
# dict comprehension for whole meta part
return { field : self.get_value(meta,field,lang) for field in fields }
def to_list_of_tuple(self,meta,fields,lang): def to_list_of_tuple(self,meta,fields,lang):
""" """
generates a list of tuples with a label and value (text) generates a list of tuples with a label and value (text)

View file

@ -33,6 +33,9 @@ debug:
# | pandoc ${target_flags} -V lang:de -o ${target_de} # | pandoc ${target_flags} -V lang:de -o ${target_de}
debug-query: debug-query:
python ${coursebuilder} -s schema.yaml -m mod.cg.yaml mod.interactsys.yaml -q "item['field'] == 'kind' and item['value'] == 'elective'" python ${coursebuilder} -s schema.yaml -m mod.cg.yaml mod.interactsys.yaml -q "kind=='compulsory'" -qs min:credits -qc form-of-instruction -qf name id credits
debug-query-book:
python ${coursebuilder} -s schema.yaml -b book.yaml -q "kind=='compulsory'" -qs min:credits -qc form-of-instruction -qf name id credits
.PHONY: clean .PHONY: clean