实现效果
streamlit
flink-playground.py
文件如下:
import streamlit as st
import io
import contextlib
import sys
import os
import uuid
import subprocess
from jinja2 import Templatest.set_page_config(layout="wide")# 设置页面标题
st.title("Flink SQL")# 初始化会话状态
if 'user_id' not in st.session_state:st.session_state.user_id = str(uuid.uuid4())# 创建一个输入框用于配置 JobManager 地址
st.session_state.jobmanager_address = st.text_input("JobManager 地址", value="10.50.108.7:48085")
# 创建一个文本框用于输入配置项
default_config = """
execution.checkpointing.interval=10s
execution.runtime-mode=batch
sql-client.execution.result-mode=table
sql-client.execution.max-table-result.rows=10000
pipeline.auto-watermark-interval=200
pipeline.max-parallelism=10
table.exec.state.ttl=1000
restart-strategy.type=fixed-delay
table.optimizer.join-reorder-enabled=true
table.exec.spill-compression.enabled=true
table.exec.spill-compression.block-size=128kb
""".strip()
st.session_state.config_input = st.text_area("输入配置项 (格式: key=value)", height=300, value=default_config)# 创建一个大的文本框用于输入代码
st.session_state.sql_input = st.text_area("输入你的 Flink SQL 代码", height=500)# 创建一个按钮
if st.button("执行 Flink Job"):try:# 读取模板文件with open("/work/template/pyflink-job.py.template", "r") as template_file:template_content = template_file.read()# 解析配置项config_dict = {}for line in st.session_state.config_input.splitlines():if '=' in line:key, value = line.split('=', 1)config_dict[key.strip()] = value.strip()# 使用 jinja2 模板引擎渲染模板template = Template(template_content)job_content = template.render(sqls=st.session_state.sql_input, config_items=config_dict)st.text("完整pyflink任务代码")st.code(job_content, language='python')# 将替换后的内容保存到临时文件file_name = f"flink_job_{st.session_state.user_id}.py"with open(file_name, "w") as job_file:job_file.write(job_content)# 使用 subprocess 执行 flink run 命令,并传递 JobManager 地址command = f"flink run -m {st.session_state.jobmanager_address} -py {file_name}"result = subprocess.run(command, shell=True, capture_output=True, text=True)# 获取捕获的输出captured_output = result.stdout# 显示输出结果st.text_area("执行结果", value=captured_output, height=200)except Exception as e:# 如果代码执行出错,打印错误信息st.error(f"代码执行出错: {e}")finally:# 删除临时文件if file_name and os.path.exists(file_name):os.remove(file_name)
运行:
nohup streamlit run /work/flink-playground.py --server.port 9999 2>&1 > .streamlit.log &
模板文件
模板文件根据用户输入动态更新任务配置和SQL
import re
from pyflink.table import EnvironmentSettings, TableEnvironmentdef remove_comments(sql):# 使用正则表达式删除单行注释和多行注释sql = re.sub(r'--.*$', '', sql, flags=re.MULTILINE) # 删除单行注释sql = re.sub(r'/\*.*?\*/', '', sql, flags=re.DOTALL) # 删除多行注释return sqldef execute_sql_file(table_env, sql_statements):sql_statements = sql_statements.split(';')for sql in sql_statements:# 删除注释sql = remove_comments(sql)sql = sql.strip()if sql:print(f"Executing SQL: {sql}")result = table_env.execute_sql(sql)# if result:# result.print()def main():# 创建 TableEnvironmentenv_settings = EnvironmentSettings.new_instance().in_batch_mode().build()table_env = TableEnvironment.create(env_settings)table_config = table_env.get_config(){% for key, value in config_items.items() %}table_config.get_configuration().set_string("{{ key }}", "{{ value }}"){% endfor %}sqls = """{{ sqls }}"""# 读取 SQL 文件并执行execute_sql_file(table_env, sqls)if __name__ == "__main__":main()