########### GUI IMPORTS ################import streamlit as st
#### IMPORTS FOR AI PIPELINES ###############from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
from transformers import AutoModel, T5Tokenizer, T5Model
from transformers import T5ForConditionalGeneration
from langchain.llms import HuggingFacePipeline
import torch
import streamlit as st
############# Displaying images on the front end #################
st.set_page_config(page_title="Mockup for single page webapp",
page_icon='💻',
layout="centered", #or wideinitial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://docs.streamlit.io/library/api-reference',
'Report a bug': "https://www.extremelycoolapp.com/bug",
'About': "# This is a header. This is an *extremely* cool app!"}# Load image placeholder from the web
st.image('https://placehold.co/750x150', width=750)
# Set a Descriptive Title
st.title("Your Beautiful App Name")
st.divider()
your_future_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras rhoncus massa sit amet est congue dapibus. Duis dictum ac nulla sit amet sollicitudin. In non metus ac neque vehicula egestas. Vestibulum quis justo id enim vestibulum venenatis. Cras gravida ex vitae dignissim suscipit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Duis efficitur, lorem ut fringilla commodo, lacus orci lobortis turpis, sit amet consequat ante diam ut libero."
st.text_area('Summarized text', your_future_text,
height = 150, key = 'result')
# Set 2 colums to make the Buttons wider
col1, col2 = st.columns(2)
btn1 = col1.button(" :star: Click ME ", use_container_width=True, type="secondary")
btn2 = col2.button(" :smile: Click ME ", use_container_width=True, type="primary")
if btn1:
st.warning('You pressed the wrong one!', icon="⚠️")
if btn2:
st.success('Good Choice!', icon="⚠️")
st.divider()
########### GUI IMPORTS ################import streamlit as st
import ssl
############# Displaying images on the front end #################
st.set_page_config(page_title="Summarize and Talk ot your Text",
page_icon='📖',
layout="centered", #or wide
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://docs.streamlit.io/library/api-reference',
'Report a bug': "https://www.extremelycoolapp.com/bug",
'About': "# This is a header. This is an *extremely* cool app!"#### IMPORTS FOR AI PIPELINES ###############from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
from transformers import AutoModel, T5Tokenizer, T5Model
from transformers import T5ForConditionalGeneration
from langchain.llms import HuggingFacePipeline
import torch
import datetime
# SET THE MODEL PATH
checkpoint = "./model/"#it is actually LaMini-Flan-T5-248M# INITIALIZE TOKENIZER AND MODEL# this part has been moved inside the AI_SummaryPL function
到目前为止没有什么新的。将在以下代码块中将函数和交互式 Streamli 放在一起,并解释构建块。
####################################################################### SUMMARIZATION FROM TEXT STRING WITH HUGGINGFACE PIPELINE #######################################################################
def AI_SummaryPL(checkpoint, text, chunks, overlap):
checkpoint is in the format of relative path
example: checkpoint = "/content/model/"#it is actually LaMini-Flan-T5-248M #tested fine
text it is either a long string or a input long string or a loaded document into string
chunks: integer, lenght of the chunks splitting
ovelap: integer, overlap for cor attention and focus retreival
RETURNS full_summary (str), delta(str) and reduction(str)
post_summary14 = AI_SummaryPL(LaMini,doc2,3700,500)
USAGE EXAMPLE:
post_summary, post_time, post_percentage = AI_SummaryPL(LaMini,originalText,3700,500)
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.chunk_size = chunks,
chunk_overlap = overlap,
length_function = len,
texts = text_splitter.split_text(text)
#checkpoint = "/content/model/" #it is actually LaMini-Flan-T5-248M #tested finecheckpoint = checkpoint
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint,
device_map='auto',
torch_dtype=torch.float32)
### INITIALIZING PIPELINEpipe_sum = pipeline('summarization',
model = base_model,
tokenizer = tokenizer,
max_length = 350,
min_length = 25## START TIMERstart = datetime.datetime.now() #not used now but useful## START CHUNKINGfull_summary = ''
for cnk in range(len(texts)):
result = pipe_sum(texts[cnk])
full_summary = full_summary + ' '+ result[0]['summary_text']
stop = datetime.datetime.now() #not used now but useful ## TIMER STOPPED AND RETURN DURATIONdelta = stop-start
### Calculating Summarization PERCENTAGEreduction = '{:.1%}'.format(len(full_summary)/len(text))
print(f"Completed in {delta}")
print(f"Reduction percentage: ", reduction)
return full_summary, delta, reduction
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.chunk_size = chunks,
chunk_overlap = overlap,
length_function = len,
texts = text_splitter.split_text(text)
defstart_sum(text):
if st.session_state.copypaste == "":
st.warning('You need to paste some text...', icon="⚠️")
else:
with st.spinner('Initializing pipelines...'):
st.success(' AI process started', icon="🤖")
print("Starting AI pipelines")
text_summary, duration, reduction = AI_SummaryPL(LaMini,text,3700,500)
txt.text_area('Summarized text', text_summary, height = 350, key='final')
timedelta.write(f'Completed in {duration}')
text_lenght.markdown(f"Initial length = {len(text.split(' '))} words / summarization = **{len(text_summary.split(' '))} words**")
redux_bar.progress(len(text_summary)/len(text), f'Reduction: **{reduction}**')
down_title.markdown(f"## Download your text Summarization")