%%%%%%%%%%%%%%%%%%%%%%%%%%%% % CC in Polish VCs % author: AK % version 25.06.2000 % % arguments can be realized in any order and in any number % due to the phrase structure rule component of ALE % the lexical head must always precede its complements, e.g., % the verb precedes its complements: OK lubi marysie but * marysie lubi % postverbal clitics precede non-clitic arguments % disambiguated wrt to the order of clitics % % the grammar tested on the following examples: % % janek moze lubic go % 1 solution % janek go lubi % 0 solutions: the head must precede complements % janek dal mu kwiaty % 1 solution % kwiaty % 2 solutions: kwiaty is either NP[nom] or NP[acc] % janek moze go lubic % 1 sloution % janek kazal mu lubic go % 1 solution % janek kazal mu go lubic % 1 solution % janek kazal marysi go lubic % 0 solutions: raised non-clitic % % `marysi' precedes raised clitic `go' % janek kazal go marysi lubic % 1: go is raised and it precedes marysi % % NP[dat] complement of `kazal' % janek powiedzial ze marysia lubi go % 1 % janek obiecal mu lubic go % 1: no CC; `mu' is a % % NP[dat] argument of `obiecal' % janek obiecal mu go lubic % 0: (OK, no raising to a subject % % control verb with NP[dat] complement % janek moze mu dac go % 1: clitic's split % janek idzie % 1: N + V % janek boi sie go % 1 % janek moze sie go bac % 1: the RM and `go' are raised % janek kazal mu go dac marysi % 1: `go' is raised, `mu' is NP[dat] complement of % % `kazal', NP `marysi' is not raised % janek kazal mu sie bac go % 1: the RM is raised % % %%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Signature % %%%%%%%%%%%%%%%%%%%%%%%%%%%% bot sub [sign_t, mod_synsem_t, list, local_t, hd_str_t, case_t, category_t, content_t, head_t, marking_t, marker_t, vform_t, pform_t, bool_t, index_t, pers_t, numb_t, gend_t]. sign_t sub [phrase_t, word_t] intro [synsem: synsem_t]. word_t sub []. phrase_t sub [] intro [dtrs: hd_str_t]. sign_t cons S goal (single_subj(S)). phrase_t cons P goal (non_clitic_phrases(P), hfp(P), marking_principle(P), sem_principle(P)). hd_str_t sub [hd_mk_t, hd_adj_t, hd_comps_t] intro [hd_dtr: sign_t, non_hd_dtrs: sign_list]. hd_mk_t sub [] intro [mk_dtr: word_t, non_hd_dtrs: e_list]. hd_adj_t sub [] intro [adj_dtr: sign_t, non_hd_dtrs: e_list]. hd_comps_t sub []. list sub [e_list, ne_list, sign_list, synsem_list, non_clitic_list, clitic_list]. e_list sub []. sign_list sub [e_list, ne_sign_list]. ne_sign_list sub [] intro [hd: sign_t, tl: sign_list]. ne_list sub [ne_sign_list, ne_synsem_list] intro [hd: bot, tl: list]. synsem_list sub [e_list, ne_synsem_list, non_clitic_list, clitic_list]. ne_synsem_list sub [ne_clitic_list, ne_non_clitic_list] intro [hd: synsem_t, tl: synsem_list]. non_clitic_list sub [e_list, ne_non_clitic_list]. ne_non_clitic_list sub [] intro [hd: non_clitic_t, tl: non_clitic_list]. clitic_list sub [e_list, ne_clitic_list]. ne_clitic_list sub [] intro [hd: clitic_t, tl: clitic_list]. synsem_t sub [clitic_t, non_clitic_t] % non_canonical is not used intro [loc: local_t]. clitic_t sub []. non_clitic_t sub []. local_t sub [] intro [cat: category_t, cont: content_t]. content_t sub [nom_obj_t, psoa_t]. nom_obj_t sub [npro_t, pron_t] intro [index: index_t]. npro_t sub []. pron_t sub [ppro_t, ana_t]. ppro_t sub []. ana_t sub [refl_t, recp_t]. refl_t sub []. recp_t sub []. psoa_t sub []. index_t sub [ref_t, non_ref_t] intro [person: pers_t, number: numb_t, gender: gend_t]. pers_t sub [first_t, second_t, third_t]. numb_t sub [sing_t, plur_t]. gend_t sub [fem_t, masc_t, neut_t, pl_gend_t]. category_t sub [] intro [head: head_t, subj: synsem_list, comps: synsem_list, marking: marking_t]. marking_t sub [marked_t, unmarked_t]. marked_t sub [ze_t, zeby_t]. ze_t sub []. zeby_t sub []. unmarked_t sub []. head_t sub [subst_t, marker_t]. subst_t sub [noun_t, adj_t, verb_t, prep_t, adv_t] intro [mod: mod_synsem_t]. mod_synsem_t sub [synsem_t, none_t]. none_t sub []. noun_t sub [] intro [case: case_t, mod: none_t]. case_t sub [nom_t,gen_t,dat_t,acc_t,loc_t,instr_t]. nom_t sub []. gen_t sub []. dat_t sub []. acc_t sub []. loc_t sub []. instr_t sub []. adj_t sub [] intro [mod: synsem_t, pred: bool_t]. bool_t sub [yes_t, no_t]. yes_t sub []. no_t sub []. adv_t sub [] intro [mod: synsem_t]. verb_t sub [] intro [vform: vform_t, mod: none_t]. vform_t sub [inf_t, fin_t]. inf_t sub []. fin_t sub []. prep_t sub [] intro [pform: pform_t]. pform_t sub [na_t, na_loc_t, w_t]. na_loc_t sub []. na_t sub []. w_t sub []. marker_t sub [] intro [spec: synsem_t]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Phrase Structure Rules % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% schema1 rule (M, synsem:loc:cat:comps: (X, []), dtrs: (hd_comps_t, hd_dtr: HDtr, non_hd_dtrs: [NHdtr])) ===> cat> (NHdtr, synsem:SubjSynsem), % phrase_t in HPSG94 cat> (HDtr, synsem:loc:cat:comps: (X,[])), % phrase_t in HPSG94 goal> (subj_val(M, [SubjSynsem], HDtr)). schema2 rule (M, phrase_t, dtrs: (hd_comps_t, hd_dtr: HDtr, non_hd_dtrs: Signs)) ===> cat> (HDtr, word_t, synsem:loc:cat:comps:Comps), goal> (at_most_three(Comps), % restricts the length of the Comps list shuffle((SubComps, ne_list),Rest,Comps), % takes any ordered subset of Comps split(SubComps), % splits SubComps into clitics and non-clitics; % clitics must precede non-clitics synsems_to_signs(SubComps,Signs)), % maps synsems (SubComps) to signs cats> Signs, goal> (comps(M, Rest), % gives the value of mother's COMPS subj_val(M, [], HDtr)). % gives the value of mother's SUBJ schema4 rule (M, phrase_t, dtrs: (hd_mk_t, mk_dtr: MDtr, hd_dtr: HDtr)) ===> cat> (MDtr, word_t, synsem:loc:cat:(head:(marker_t, spec: Synsem))), cat> (HDtr, phrase_t, synsem:Synsem), goal> (comps_val(M, HDtr,[]), subj_val(M, [], HDtr)). schema5 rule (M,phrase_t, dtrs: (hd_adj_t, hd_dtr: HDtr, adj_dtr: AdjDtr)) ===> cat> (AdjDtr, sign_t, % phrase_t in HPSG94 synsem:loc:cat:head:mod: (Synsem, non_clitic_t)), cat> (HDtr, sign_t, % phrase_t in HPSG94 synsem: Synsem), goal> (comps_val(M, HDtr,[]), subj_val(M, [], HDtr)). %%%%%%%%%%%%%%%%%%%%%%%%%%%% % % definite clauses % %%%%%%%%%%%%%%%%%%%%%%%%%%%% synsems_to_signs([],[]) if true. synsems_to_signs([Syns|SynsList],[(sign_t, synsem:Syns)|SignList]) if synsems_to_signs(SynsList,SignList). append([], X, X) if true. append([H|R],L,[H|L1]) if append(R,L,L1). at_most_three((tl: ([]; (tl: ([]; tl:[]))) )) if true. % shuffle/3 gets an arbitrary subset of Ys with R as the remaining list shuffle([X|Xs], R, Ys) if select(X, Ys, Ys1), shuffle(Xs, R, Ys1). shuffle([], Ys, Ys) if true. % split/1 splits L into clitic and non-clitics lists; % clitics precede non-clitics split(L) if append(clitic_list, non_clitic_list, L). % removes one element from a list; the usual Prolog select/3 predicate select(Z, [Z|Xs], Xs) if true. select(Z, [Y|Ys], [Y|Zs]) if select(Z, Ys, Zs). % clitics are not projected to phrases (HD-DTR|SYNSEM is always a non-clitic) and % phrases have non-clitic synsem % non_clitic_phrases((phrase_t, synsem: non_clitic_t, dtrs:hd_dtr:synsem: non_clitic_t)) if true. % Head Feature Principle hfp((synsem:loc:cat:head:H, dtrs:hd_dtr:synsem:loc:cat:head:H)) if true. comps((synsem:loc:cat:comps: M), M) if true. comps_val((synsem:loc:cat:comps: M), (synsem:loc:cat:comps: H), X) if append(X, M, H). subj_val((synsem:loc:cat:subj: M), S, (synsem:loc:cat:subj: H)) if append(S, M, H). single_subj((synsem:loc:cat:subj: ([]; tl:[]))) if true. marking_principle((dtrs:(hd_mk_t, mk_dtr:synsem:loc:cat:marking:X), synsem:loc:cat:marking:X ; dtrs:((hd_comps_t; hd_adj_t), hd_dtr:synsem:loc:cat:marking:X), synsem:loc:cat:marking:X )) if true. sem_principle((dtrs:(hd_adj_t, adj_dtr:synsem:loc:cont:X), synsem:loc:cont:X ; dtrs:((hd_comps_t; hd_mk_t), hd_dtr:synsem:loc:cont:X), synsem:loc:cont:X )) if true. %%%%%%%%%%%%%%%%%%%%%%%%%%% % % macros % %%%%%%%%%%%%%%%%%%%%%%%%%%% noun(Case) macro loc:cat:(head: case: Case, subj: [], comps: [], marking: unmarked_t). noun(Synsem, Case) macro (Synsem, loc:cat:(head: case: Case, subj: [], comps: [], marking: unmarked_t)). nps(Case, Index) macro (synsem_t, loc:(cat:(head: case: Case, subj: [], comps: [], marking: unmarked_t), cont:index: Index)). npnc(Case, Index) macro (non_clitic_t, loc:(cat:(head: case: Case, subj: [], comps: [], marking: unmarked_t), cont:index: Index)). non_verb macro (noun_t; adj_t; prep_t; adv_t). non_verb_synsem macro loc:cat:head: @non_verb. thsg(Gend) macro person: third_t, number: sing_t, gender: Gend. adjp macro loc:cat:(head:(adj_t, pred: yes_t), subj:[], comps:[]). s(Vform,Marking) macro non_clitic_t, loc:cat:(head:vform:Vform, subj: [], comps: [], marking: Marking). prep(Pform,Mod) macro loc:cat:(head:(pform:Pform, mod:Mod), subj:[], comps:[], marking:unmarked_t). vinf macro non_clitic_t, % no clitic verbs considered loc:cat:(head:vform:inf_t, subj: [@noun(_)], comps: [], marking:unmarked_t). inf_comp(Index, List) macro non_clitic_t, loc:cat:(head:vform:inf_t, subj: [@nps(_, Index)], comps: List, marking:unmarked_t). inf_no_rm_comp(Index, List) macro non_clitic_t, loc:cat:(head:vform:inf_t, subj: [@nps(_, Index)], comps: (List, ([]; [@non_rm]; [@non_rm, @non_rm]; [@non_rm, @non_rm, @non_rm])), marking:unmarked_t). no_rm_list macro []; [@non_rm]; [@non_rm, @non_rm]; [@non_rm, @non_rm, @non_rm]. rm macro clitic_t, loc:(cat:(head: case: acc_t, subj: [], comps: [], marking: unmarked_t), cont: ana_t). non_rm(Case) macro (@npnc(Case, _); (@noun(clitic_t, Case), loc:cont: ppro_t)). non_rm macro (non_clitic_t; (@noun(clitic_t, _), loc:cont: ppro_t)). %%%%%%%%%%%%%%%%%%%%%%%%% % % a simple lexicon % %%%%%%%%%%%%%%%%%%%%%%%%% janek ---> word_t, synsem: @noun(nom_t). marysi ---> word_t, synsem: @npnc((dat_t; gen_t), @thsg(fem_t)). mu ---> word_t, synsem: (clitic_t, @nps(dat_t, @thsg(masc_t))). go ---> word_t, synsem: (clitic_t, @nps((acc_t; gen_t), @thsg(masc_t))). sie ---> word_t, synsem: @rm. marysia ---> word_t, synsem: @noun(non_clitic_t, nom_t). marysie ---> word_t, synsem: @noun(non_clitic_t, acc_t). stole ---> word_t, synsem: @noun(non_clitic_t, loc_t). kwiaty ---> word_t, synsem: (non_clitic_t, @noun((acc_t;nom_t))). idzie ---> word_t, synsem:loc:(cat:(head:vform:fin_t, subj: [@noun(nom_t)], comps: [], marking: unmarked_t)). isc ---> word_t, synsem:loc:(cat:(head:vform:inf_t, subj: [@noun(_)], comps: [], marking: unmarked_t)). dal ---> word_t, synsem:loc:(cat:(head:vform:fin_t, subj: [@noun(synsem_t, nom_t)], comps: ([@noun(synsem_t, acc_t), @noun(synsem_t, dat_t)]), marking: unmarked_t)). dac ---> word_t, synsem:loc:(cat:(head:vform:inf_t, subj: [@noun(synsem_t, _)], comps: ([@noun(synsem_t, acc_t), @noun(synsem_t, dat_t)]), marking: unmarked_t)). czeka ---> word_t, synsem: (non_clitic_t, loc:cat:(head:vform:fin_t, subj:[@noun(nom_t)], comps:[@prep(na_t,none_t)], marking: unmarked_t)). czekac ---> word_t, synsem:(non_clitic_t, loc:cat:(head:vform:inf_t, subj:[@noun(_)], comps:[], marking:unmarked_t)). powiedzial ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj:[@noun(nom_t)], comps:[@s(fin_t,ze_t)], marking:unmarked_t). lubi ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj:[@noun(non_clitic_t, nom_t)], comps:[@noun(synsem_t, acc_t)], marking:unmarked_t). lubic ---> word_t, synsem:loc:cat:(head:vform:inf_t, subj:[@noun(_)], comps:[@noun(synsem_t, acc_t)], marking:unmarked_t). boi ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj: [@noun(non_clitic_t, nom_t)], comps: [@rm, @non_rm(gen_t)], marking: unmarked_t). bac ---> word_t, synsem:loc:cat:(head:vform:inf_t, subj: [@noun(non_clitic_t, _)], comps: [@rm, @non_rm(gen_t)], marking: unmarked_t). stoja ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj: [@noun(nom_t)], comps: ([];[@prep(na_loc_t,none_t)]), marking: unmarked_t). kazal ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj:[@noun(nom_t)], comps:([@nps(dat_t, X),@inf_comp(X, List) | List]), marking:unmarked_t). kazac ---> word_t, synsem:loc:cat:(head:vform:inf_t, subj:[@noun(_)], comps:[@nps(dat_t, X),@inf_comp(X, List) | List], marking:unmarked_t). przeszkodzil ---> word_t, % no CC synsem:loc:cat:(head:vform:fin_t, subj:[@noun(nom_t)], comps:([@nps(dat_t, X), @inf_comp(X, (List, non_clitic_list)) | List]), marking:unmarked_t). przeszkodzic ---> word_t, % no CC synsem:loc:cat:(head:vform:inf_t, subj:[@noun(_)], comps:([@nps(dat_t, X),@inf_comp(X, [])]), marking:unmarked_t). obiecal ---> word_t, % no CC synsem:loc:cat:(head:vform:fin_t, subj:[@nps(nom_t, X)], comps:[@noun(dat_t),@inf_comp(X, [])], marking:unmarked_t). obiecac ---> word_t, % no CC synsem:loc:cat:(head:vform:inf_t, subj:[@nps(_, X)], comps:[@noun(dat_t),@inf_comp(X, [])], marking:unmarked_t). moze ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj:[@nps(nom_t, X)], comps:[@inf_comp(X, List) | List], marking:unmarked_t). wyglada ---> word_t, synsem:loc:cat:(head:vform:fin_t, subj:[@noun(nom_t)], comps:[@prep(na_t,none_t)], marking:unmarked_t). na ---> word_t, synsem:loc:cat:(head:(pform:na_loc_t, mod:none_t), subj: [], comps: [@noun(loc_t)], marking: unmarked_t). na ---> word_t, synsem:loc:cat:(head:(pform:na_t, mod:none_t), subj: [], comps: ([@noun(acc_t)];[@adjp])). duzym ---> word_t, synsem:loc:cat:(head:(pred:no_t, mod: @noun(loc_t)), subj:[], comps:[], marking:unmarked_t). zmeczonego ---> word_t, synsem: @adjp. piekne ---> word_t, synsem:loc:(cat:(head: (adj_t,mod: @noun((nom_t;acc_t))), subj: [], comps: [], marking: unmarked_t)). bardzo ---> word_t, synsem:loc:(cat:(head:(adv_t, mod:loc:cat:head:adj_t), subj: [], comps: [], marking: unmarked_t)). ze ---> word_t, synsem:loc:cat:(head: (marker_t, spec: @s(fin_t,unmarked_t)), marking: ze_t, subj: [], comps: []).