如上图所示是将plan_dxl转为plan_stmt的主入口函数。其主要工作就是创建plan_id_generator、motion_id_generator、param_id_generator和table_list、subplans_list,并将其设置到CContextDXLToPlStmt dxl_to_plan_stmt_ctxt中供后续流程调用;初始化CTranslatorDXLToPlStmt类,形参为MDACCESSOR和CContextDXLToPlStmt;最终调用dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL实现DXL -> PlannedStmt的转化。
//---------------------------------------------------------------------------
// @function:
// COptTasks::ConvertToPlanStmtFromDXL
// @doc:
// Translate a DXL tree into a planned statement
//---------------------------------------------------------------------------
PlannedStmt *COptTasks::ConvertToPlanStmtFromDXL(CMemoryPool *mp, CMDAccessor *md_accessor, const CDXLNode *dxlnode, bool can_set_tag, DistributionHashOpsKind distribution_hashops) {CIdGenerator plan_id_generator(1 /* ulStartId */);CIdGenerator motion_id_generator(1 /* ulStartId */);CIdGenerator param_id_generator(0 /* ulStartId */);List *table_list = NULL; List *subplans_list = NULL;CContextDXLToPlStmt dxl_to_plan_stmt_ctxt(mp, &plan_id_generator, &motion_id_generator, ¶m_id_generator, distribution_hashops, &table_list, &subplans_list);// translate DXL -> PlannedStmtCTranslatorDXLToPlStmt dxl_to_plan_stmt_translator( mp, md_accessor, &dxl_to_plan_stmt_ctxt, gpdb::GetGPSegmentCount());return dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL(dxlnode,can_set_tag);
}
CContextDXLToPlStmt
CContextDXLToPlStmt类提供对CIdGenerators的使用和RangeTableEntries、Subplans的访问。【providing access to CIdGenerators (needed to number initplans, motion nodes as well as params), list of RangeTableEntries and Subplans generated so far during DXL–>PlStmt translation.】
CContextDXLToPlStmt构造函数函数签名为 CContextDXLToPlStmt(CMemoryPool *mp, CIdGenerator *plan_id_counter, CIdGenerator *motion_id_counter, CIdGenerator *param_id_counter, DistributionHashOpsKind distribution_hashops, List **rtable_entries_list, List **subplan_entries_list);
为m_plan_id_counter、m_motion_id_counter、m_param_id_counter、m_distribution_hashops、m_subplan_entries_list提供了初始值,初始化m_cte_consumer_info和m_num_partition_selectors_array。
class CContextDXLToPlStmt {
private:CMemoryPool *m_mp;CIdGenerator *m_plan_id_counter; // counter for generating plan ids CIdGenerator *m_motion_id_counter; // counter for generating motion ids CIdGenerator *m_param_id_counter; // counter for generating unique param ids DistributionHashOpsKind m_distribution_hashops; // What operator classes to use for distribution keys? List **m_rtable_entries_list; // list of all rtable entries List **m_subplan_entries_list; // list of all subplan entriesstruct SCTEConsumerInfo{ // cte consumer information List *m_cte_consumer_list; // list of ShareInputScan represent cte consumers SCTEConsumerInfo(List *plan_cte) : m_cte_consumer_list(plan_cte){} // ctorvoid AddCTEPlan(ShareInputScan *share_input_scan) { m_cte_consumer_list = gpdb::LAppend(m_cte_consumer_list, share_input_scan); }~SCTEConsumerInfo() { gpdb::ListFree(m_cte_consumer_list); }};// hash maps mapping ULONG -> SCTEConsumerInfotypedef CHashMap<ULONG, SCTEConsumerInfo, gpos::HashValue<ULONG>, gpos::Equals<ULONG>, CleanupDelete<ULONG>, CleanupDelete<SCTEConsumerInfo> > HMUlCTEConsumerInfo;// hash map of the cte identifiers and the cte consumers with the same cte identifierHMUlCTEConsumerInfo *m_cte_consumer_info;List *m_partitioned_tables_list; // list of oids of partitioned tables ULongPtrArray *m_num_partition_selectors_array; // number of partition selectors for each dynamic scanULONG m_result_relation_index; // index of the target relation in the rtable or 0 if not a DML statementIntoClause *m_into_clause; // into clauseGpPolicy *m_distribution_policy; // CTAS distribution policypublic: CContextDXLToPlStmt(CMemoryPool *mp, CIdGenerator *plan_id_counter,CIdGenerator *motion_id_counter,CIdGenerator *param_id_counter,DistributionHashOpsKind distribution_hashops,List **rtable_entries_list,List **subplan_entries_list); // ctor/dtor~CContextDXLToPlStmt(); // dtorULONG GetNextPlanId() { return m_plan_id_counter->next_id(); }; // retrieve the next plan id ULONG GetCurrentMotionId() { return m_motion_id_counter->current_id(); }; // retrieve the current motion id ULONG GetNextMotionId() { return m_motion_id_counter->next_id(); }; // retrieve the next motion idULONG GetCurrentParamId() { return m_param_id_counter->next_id(); }; // retrieve the current parameter id ULONG GetNextParamId() { return m_param_id_counter->current_id(); }; // retrieve the next parameter id// add a newly found CTE consumervoid AddCTEConsumerInfo(ULONG cte_id, ShareInputScan *share_input_scan);// return the list of shared input scan plans representing the CTE consumersList *GetCTEConsumerList(ULONG cte_id) const;List *GetRTableEntriesList() { return (*(m_rtable_entries_list)); }; // return list of range table entries// add a range table entryvoid AddRTE(RangeTblEntry *rte, BOOL is_result_relation = false);// index of result relation in the rtableULONG GetResultRelationIndex() const { return m_result_relation_index; } void AddSubplan(Plan *);List *GetSubplanEntriesList();// return list of partitioned table indexesList *GetPartitionedTablesList() const{ return m_partitioned_tables_list; }// return list containing number of partition selectors for every scan idList *GetNumPartitionSelectorsList() const;// add a partitioned table indexvoid AddPartitionedTable(OID oid);// increment the number of partition selectors for the given scan idvoid IncrementPartitionSelectors(ULONG scan_id);// add CTAS informationvoid AddCtasInfo(IntoClause *into_clause, GpPolicy *distribution_policy) { m_into_clause = into_clause; m_distribution_policy = distribution_policy; };// into clauseIntoClause *GetIntoClause() const{ return m_into_clause; }// CTAS distribution policyGpPolicy * GetDistributionPolicy() const { return m_distribution_policy; }// Get the hash opclass or hash function for given datatype, based on decision made by DetermineDistributionHashOpclasses()Oid GetDistributionHashOpclassForType(Oid typid);Oid GetDistributionHashFuncForType(Oid typid);
};
该类提供如下功能:
1 CIdGenerators的next和current id函数
2 返回RangeTableEntries和subplans generated so far列表,其实就是m_rtable_entries_list和m_subplan_entries_list。
AddRTE函数向m_rtable_entries_list添加RangeTblEntry,如果设置is_result_relation则需要将m_result_relation_index更新为刚加入的RangeTblEntry位置。
//---------------------------------------------------------------------------
// @function: CContextDXLToPlStmt::AddRTE
// @doc: Add a RangeTableEntries
//---------------------------------------------------------------------------
void CContextDXLToPlStmt::AddRTE(RangeTblEntry *rte, BOOL is_result_relation){(*(m_rtable_entries_list)) = gpdb::LAppend((*(m_rtable_entries_list)), rte);rte->inFromCl = true;if (is_result_relation) {rte->inFromCl = false;m_result_relation_index = gpdb::ListLength(*(m_rtable_entries_list));}
}
3 AddCTEConsumerInfo需要将share_input_scan包装为cte_plan构造成SCTEConsumerInfo结构体。m_cte_consumer_info是key为cte_id、value为SCTEConsumerInfo的map,因此插入时需要先查找映射关系是否存在。
//---------------------------------------------------------------------------
// @function: CContextDXLToPlStmt::AddCTEConsumerInfo
// @doc: Add information about the newly found CTE entry
//---------------------------------------------------------------------------
void CContextDXLToPlStmt::AddCTEConsumerInfo(ULONG cte_id, ShareInputScan *share_input_scan) {SCTEConsumerInfo *cte_info = m_cte_consumer_info->Find(&cte_id);if (NULL != cte_info){cte_info->AddCTEPlan(share_input_scan); return;}List *cte_plan = ListMake1(share_input_scan);ULONG *key = GPOS_NEW(m_mp) ULONG(cte_id);m_cte_consumer_info->Insert(key, GPOS_NEW(m_mp) SCTEConsumerInfo(cte_plan));
}//---------------------------------------------------------------------------
// @function: CContextDXLToPlStmt::GetCTEConsumerList
// @doc: Return the list of GPDB plan nodes representing the CTE consumers with the given CTE identifier
//---------------------------------------------------------------------------
List *CContextDXLToPlStmt::GetCTEConsumerList(ULONG cte_id) const{SCTEConsumerInfo *cte_info = m_cte_consumer_info->Find(&cte_id);if (NULL != cte_info){return cte_info->m_cte_consumer_list;}return NULL;
}
4 m_partitioned_tables_list存放的是分区表indexes列表,其中存放的是表oid。
// return list of partitioned table indexes
List *GetPartitionedTablesList() const{ return m_partitioned_tables_list; }
// add a partitioned table oid
void AddPartitionedTable(OID oid){if (!gpdb::ListMemberOid(m_partitioned_tables_list, oid)){m_partitioned_tables_list = gpdb::LAppendOid(m_partitioned_tables_list, oid);}
}
partition_selectors_list存放的是每个scan id中所包含的partition selectors的数量列表,而IncrementPartitionSelectors函数真针对给定scan id 的partition selectors进行递增操作
// return list containing number of partition selectors for every scan id
List *CContextDXLToPlStmt::GetNumPartitionSelectorsList() const{List *partition_selectors_list = NIL;const ULONG len = m_num_partition_selectors_array->Size();for (ULONG ul = 0; ul < len; ul++) {ULONG *num_partition_selectors = (*m_num_partition_selectors_array)[ul];partition_selectors_list = gpdb::LAppendInt(partition_selectors_list,*num_partition_selectors);}return partition_selectors_list;
}// increment the number of partition selectors for the given scan id
void CContextDXLToPlStmt::IncrementPartitionSelectors(ULONG scan_id) {// add extra elements to the array if necessaryconst ULONG len = m_num_partition_selectors_array->Size();for (ULONG ul = len; ul <= scan_id; ul++){ULONG *pul = GPOS_NEW(m_mp) ULONG(0);m_num_partition_selectors_array->Append(pul);}ULONG *ul = (*m_num_partition_selectors_array)[scan_id];(*ul)++;
}
CTranslatorDXLToPlStmt
CTranslatorDXLToPlStmt类提供了DXLToPlStmt的转换函数。其构造函数主要是将元数据访问类md_accessor,dxl_to_plstmt_context设置到对应的成员中,同样需要初始化CTranslatorDXLToScalar类,和QueryToDXL流程相似。InitTranslators函数则是初始化对应DXLNode转换为PlStmt的函数。
CTranslatorDXLToPlStmt::CTranslatorDXLToPlStmt(CMemoryPool *mp, CMDAccessor *md_accessor, CContextDXLToPlStmt *dxl_to_plstmt_context, ULONG num_of_segments): m_mp(mp), m_md_accessor(md_accessor), m_dxl_to_plstmt_context(dxl_to_plstmt_context), m_cmd_type(CMD_SELECT), m_is_tgt_tbl_distributed(false), m_result_rel_list(NULL), m_num_of_segments(num_of_segments), m_partition_selector_counter(0){m_translator_dxl_to_scalar = GPOS_NEW(m_mp)CTranslatorDXLToScalar(m_mp, m_md_accessor, m_num_of_segments);InitTranslators();
}void CTranslatorDXLToPlStmt::InitTranslators(){for (ULONG idx = 0; idx < GPOS_ARRAY_SIZE(m_dxlop_translator_func_mapping_array); idx++) m_dxlop_translator_func_mapping_array[idx] = NULL;// array mapping operator type to translator functionstatic const STranslatorMapping dxlop_translator_func_mapping_array[] = {{EdxlopPhysicalTableScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},{EdxlopPhysicalExternalScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},{EdxlopPhysicalMultiExternalScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},{EdxlopPhysicalIndexScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLIndexScan},{EdxlopPhysicalIndexOnlyScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLIndexOnlyScan},{EdxlopPhysicalHashJoin,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLHashJoin},{EdxlopPhysicalNLJoin,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLNLJoin},{EdxlopPhysicalMergeJoin,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMergeJoin},{EdxlopPhysicalMotionGather,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},{EdxlopPhysicalMotionBroadcast,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},{EdxlopPhysicalMotionRedistribute,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDuplicateSensitiveMotion},{EdxlopPhysicalMotionRandom,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDuplicateSensitiveMotion},{EdxlopPhysicalMotionRoutedDistribute,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},{EdxlopPhysicalLimit,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLLimit},{EdxlopPhysicalAgg, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLAgg},{EdxlopPhysicalWindow,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLWindow},{EdxlopPhysicalSort, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLSort},{EdxlopPhysicalSubqueryScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLSubQueryScan},{EdxlopPhysicalResult,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLResult},{EdxlopPhysicalAppend,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLAppend},{EdxlopPhysicalMaterialize,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMaterialize},{EdxlopPhysicalSequence,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLSequence},{EdxlopPhysicalDynamicTableScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDynTblScan},{EdxlopPhysicalDynamicIndexScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDynIdxScan},{EdxlopPhysicalTVF, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLTvf},{EdxlopPhysicalDML, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDml},{EdxlopPhysicalSplit,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLSplit},{EdxlopPhysicalRowTrigger,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLRowTrigger},{EdxlopPhysicalAssert,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLAssert},{EdxlopPhysicalCTEProducer,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLCTEProducerToSharedScan},{EdxlopPhysicalCTEConsumer,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLCTEConsumerToSharedScan},{EdxlopPhysicalBitmapTableScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLBitmapTblScan},{EdxlopPhysicalDynamicBitmapTableScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLBitmapTblScan},{EdxlopPhysicalCTAS, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLCtas},{EdxlopPhysicalPartitionSelector,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLPartSelector},{EdxlopPhysicalValuesScan,&gpopt::CTranslatorDXLToPlStmt::TranslateDXLValueScan},};const ULONG num_of_translators =GPOS_ARRAY_SIZE(dxlop_translator_func_mapping_array);for (ULONG idx = 0; idx < num_of_translators; idx++){STranslatorMapping elem = dxlop_translator_func_mapping_array[idx];m_dxlop_translator_func_mapping_array[elem.dxl_op_id] = elem.dxlnode_to_logical_funct;}
}
GetPlannedStmtFromDXL
实现Translate DXL node into a PlannedStmt功能的入口函数:1 初始化CDXLTranslateContext dxl_translate_ctxt和CDXLTranslationContextArray *ctxt_translation_prev_siblings 2 调用TranslateDXLOperatorToPlan(dxlnode, &dxl_translate_ctxt, ctxt_translation_prev_siblings)
进行转换【TranslateDXLOperatorToPlan函数根据dxlnode->GetOperator()->GetDXLOperator()
不同的操作符id获取对应的转换函数,并调用转换函数进行转换】3 将所有的RangeTblEntry中的RTE_RELATION的oid从RangeTblEntry提取出来放到oids_list中
4 组装planned stmt 5 如果是CMD_SELECT,且dxlnode中的m_direct_dispatch_info不为null,说明该执行计划可以进行direct dispath给某个segment,因此需要设置plan中的directDispatch信息,比如dispath的segment contentId列表,将planned_stmt->planTree中的Motion节点都设置directDispatch信息。
PlannedStmt *CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, bool can_set_tag) {CDXLTranslateContext dxl_translate_ctxt(m_mp, false);CDXLTranslationContextArray *ctxt_translation_prev_siblings = GPOS_NEW(m_mp) CDXLTranslationContextArray(m_mp);Plan *plan = TranslateDXLOperatorToPlan(dxlnode, &dxl_translate_ctxt, ctxt_translation_prev_siblings);ctxt_translation_prev_siblings->Release();// collect oids from rtableList *oids_list = NIL;ListCell *lc_rte = NULL;ForEach(lc_rte, m_dxl_to_plstmt_context->GetRTableEntriesList()){RangeTblEntry *pRTE = (RangeTblEntry *) lfirst(lc_rte);if (pRTE->rtekind == RTE_RELATION){oids_list = gpdb::LAppendOid(oids_list, pRTE->relid);}}// assemble planned stmtPlannedStmt *planned_stmt = MakeNode(PlannedStmt); planned_stmt->planGen = PLANGEN_OPTIMIZER;planned_stmt->rtable = m_dxl_to_plstmt_context->GetRTableEntriesList();planned_stmt->subplans = m_dxl_to_plstmt_context->GetSubplanEntriesList();planned_stmt->planTree = plan;// store partitioned table indexes in planned stmtplanned_stmt->queryPartOids = m_dxl_to_plstmt_context->GetPartitionedTablesList();planned_stmt->canSetTag = can_set_tag;planned_stmt->relationOids = oids_list;planned_stmt->numSelectorsPerScanId = m_dxl_to_plstmt_context->GetNumPartitionSelectorsList();plan->nMotionNodes = m_dxl_to_plstmt_context->GetCurrentMotionId() - 1;planned_stmt->nMotionNodes = m_dxl_to_plstmt_context->GetCurrentMotionId() - 1;planned_stmt->commandType = m_cmd_type;if (0 == plan->nMotionNodes && !m_is_tgt_tbl_distributed){// no motion nodes and not a DML on a distributed tableplan->dispatch = DISPATCH_SEQUENTIAL;}else{plan->dispatch = DISPATCH_PARALLEL;}planned_stmt->resultRelations = m_result_rel_list;// GPDB_92_MERGE_FIXME: we really *should* be handling intoClause// but currently planner cheats (c.f. createas.c)// shift the intoClause handling into planner and re-enable this// pplstmt->intoClause = m_pctxdxltoplstmt->Pintocl();planned_stmt->intoPolicy = m_dxl_to_plstmt_context->GetDistributionPolicy();SetInitPlanVariables(planned_stmt);if (CMD_SELECT == m_cmd_type && NULL != dxlnode->GetDXLDirectDispatchInfo()){List *direct_dispatch_segids = TranslateDXLDirectDispatchInfo(dxlnode->GetDXLDirectDispatchInfo());plan->directDispatch.contentIds = direct_dispatch_segids;plan->directDispatch.isDirectDispatch = (NIL != direct_dispatch_segids);if (plan->directDispatch.isDirectDispatch){List *motion_node_list = gpdb::ExtractNodesPlan(planned_stmt->planTree, T_Motion, true /*descendIntoSubqueries*/);ListCell *lc = NULL;ForEach(lc, motion_node_list){Motion *motion = (Motion *) lfirst(lc);motion->plan.directDispatch.isDirectDispatch = true;motion->plan.directDispatch.contentIds = plan->directDispatch.contentIds;}}}return planned_stmt;
}