diff --git a/zulip/integrations/litellm/README.md b/zulip/integrations/litellm/README.md new file mode 100644 index 00000000..9d66fcb4 --- /dev/null +++ b/zulip/integrations/litellm/README.md @@ -0,0 +1,36 @@ +# Summarize topic + +Generate a short summary of the last 100 messages in the provided topic URL. + +### API Keys + +For testing you need access token from +https://huggingface.co/settings/tokens (or set the correct env +variable with the access token if using a different model) + +In `~/.zuliprc` add a section named `litellm` and set the api key for +the model you are trying to use. For example: + +``` +[litellm] +HUGGINGFACE_API_KEY=YOUR_API_KEY +``` + +### Setup + +```bash +$ pip install -r zulip/integrations/litellm/requirements.txt +``` + +Just run `zulip/integrations/litellm/summarize-topic` to generate +sample summary. + +```bash +$ zulip/integrations/litellm/summarize-topic --help +usage: summarize-topic [-h] [--url URL] [--model MODEL] + +options: + -h, --help show this help message and exit + --url URL The URL to fetch content from + --model MODEL The model name to use for summarization +``` diff --git a/zulip/integrations/litellm/requirements.txt b/zulip/integrations/litellm/requirements.txt new file mode 100644 index 00000000..f4d08527 --- /dev/null +++ b/zulip/integrations/litellm/requirements.txt @@ -0,0 +1,2 @@ +zulip +litellm diff --git a/zulip/integrations/litellm/summarize-topic b/zulip/integrations/litellm/summarize-topic new file mode 100755 index 00000000..901017b0 --- /dev/null +++ b/zulip/integrations/litellm/summarize-topic @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys +import urllib.parse +from configparser import ConfigParser + +from litellm import completion # type: ignore[import-not-found] + +import zulip + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--url", + type=str, + help="The URL to fetch content from", + default="https://chat.zulip.org/#narrow/stream/101-design/topic/more.20user.20indicators", + ) + parser.add_argument( + "--model", + type=str, + help="The model name to use for summarization", + default="huggingface/meta-llama/Meta-Llama-3-8B-Instruct", + ) + parser.add_argument( + "--max-tokens", + type=int, + help="The maximum tokens permitted in the response", + default=100, + ) + parser.add_argument( + "--max-messages", + type=int, + help="The maximum number of messages fetched from the server", + default=100, + ) + parser.add_argument( + "--verbose", + type=bool, + help="Print verbose debugging output", + default=False, + ) + args = parser.parse_args() + + config_file = zulip.get_default_config_filename() + if not config_file: + print("Could not find the Zulip configuration file. Please read the provided README.") + sys.exit() + + client = zulip.Client(config_file=config_file) + + config = ConfigParser() + # Make config parser case sensitive otherwise API keys will be lowercased + # which is not supported by litellm. + # https://docs.python.org/3/library/configparser.html#configparser.ConfigParser.optionxform + config.optionxform = str # type: ignore[assignment, method-assign] + + with open(config_file) as f: + config.read_file(f, config_file) + + # Set all the keys in `litellm` as environment variables. + for key in config["litellm"]: + if args.verbose: + print("Setting key:", key) + os.environ[key] = config["litellm"][key] + + url = args.url + model = args.model + + base_url, narrow_hash = url.split("#") + narrow_hash_terms = narrow_hash.split("/") + channel = narrow_hash_terms[2].split("-")[1] + topic = narrow_hash_terms[4] + channel = urllib.parse.unquote(channel.replace(".", "%")) + topic = urllib.parse.unquote(topic.replace(".", "%")) + + narrow = [ + {"operator": "channel", "operand": channel}, + {"operator": "topic", "operand": topic}, + ] + + request = { + "anchor": "newest", + "num_before": args.max_messages, + "num_after": 0, + "narrow": narrow, + # Fetch raw Markdown, not HTML + "apply_markdown": False, + } + result = client.get_messages(request) + if result["result"] == "error": + print("Failed fetching message history", result) + sys.exit(1) + messages = result["messages"] + + if len(messages) == 0: + print("No messages in conversation to summarize") + sys.exit(0) + + formatted_messages = [ + {"content": f"{message['sender_full_name']}: {message['content']}", "role": "user"} + for message in messages + ] + + # Provide a instruction if using an `Instruct` model. + if "Instruct" in model: + formatted_messages.append( + { + "content": """ +Summarize the above content within 90 words. +""", + "role": "user", + } + ) + + # Send formatted messages to the LLM model for summarization + response = completion( + max_tokens=args.max_tokens, + model=model, + messages=formatted_messages, + ) + + print("Summarized conversation URL:", url) + print( + f"Used {response['usage']['total_tokens']} tokens to summarize {len(formatted_messages)} Zulip messages." + ) + print() + print(response["choices"][0]["message"]["content"])